Repository: tidymodels/infer
Branch: main
Commit: 1e5cabee6f83
Files: 155
Total size: 768.6 KB

Directory structure:
gitextract__66l93tt/

├── .Rbuildignore
├── .gitattributes
├── .github/
│   ├── .gitignore
│   ├── CODE_OF_CONDUCT.md
│   └── workflows/
│       ├── R-CMD-check.yaml
│       ├── check-hard.yaml
│       ├── lock.yaml
│       ├── pkgdown.yaml
│       ├── pr-commands.yaml
│       └── test-coverage.yaml
├── .gitignore
├── .vscode/
│   ├── extensions.json
│   └── settings.json
├── CONTRIBUTING.md
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R/
│   ├── assume.R
│   ├── calculate.R
│   ├── deprecated.R
│   ├── fit.R
│   ├── generate.R
│   ├── get_confidence_interval.R
│   ├── get_p_value.R
│   ├── gss.R
│   ├── hypothesize.R
│   ├── infer.R
│   ├── observe.R
│   ├── pipe.R
│   ├── print_methods.R
│   ├── rep_sample_n.R
│   ├── set_params.R
│   ├── shade_confidence_interval.R
│   ├── shade_p_value.R
│   ├── specify.R
│   ├── utils.R
│   ├── visualize.R
│   └── wrappers.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── air.toml
├── codecov.yml
├── data/
│   └── gss.rda
├── data-raw/
│   └── save_gss.R
├── figs/
│   ├── paper/
│   │   ├── apa.csl
│   │   ├── columns.tex
│   │   ├── paper.Rmd
│   │   ├── paper.bib
│   │   ├── paper.log
│   │   └── paper.md
│   └── rethinking-inference.key
├── infer.Rproj
├── inst/
│   └── CITATION
├── man/
│   ├── assume.Rd
│   ├── calculate.Rd
│   ├── chisq_stat.Rd
│   ├── chisq_test.Rd
│   ├── deprecated.Rd
│   ├── fit.infer.Rd
│   ├── generate.Rd
│   ├── get_confidence_interval.Rd
│   ├── get_p_value.Rd
│   ├── gss.Rd
│   ├── hypothesize.Rd
│   ├── infer.Rd
│   ├── observe.Rd
│   ├── pipe.Rd
│   ├── print.infer.Rd
│   ├── prop_test.Rd
│   ├── reexports.Rd
│   ├── rep_sample_n.Rd
│   ├── shade_confidence_interval.Rd
│   ├── shade_p_value.Rd
│   ├── specify.Rd
│   ├── t_stat.Rd
│   ├── t_test.Rd
│   └── visualize.Rd
├── man-roxygen/
│   └── seeds.Rmd
├── tests/
│   ├── testthat/
│   │   ├── _snaps/
│   │   │   ├── aliases.md
│   │   │   ├── assume.md
│   │   │   ├── calculate.md
│   │   │   ├── fit.md
│   │   │   ├── generate.md
│   │   │   ├── get_confidence_interval.md
│   │   │   ├── get_p_value.md
│   │   │   ├── hypothesize.md
│   │   │   ├── observe.md
│   │   │   ├── print.md
│   │   │   ├── rep_sample_n.md
│   │   │   ├── shade_confidence_interval.md
│   │   │   ├── shade_p_value.md
│   │   │   ├── specify.md
│   │   │   ├── utils.md
│   │   │   ├── visualize.md
│   │   │   └── wrappers.md
│   │   ├── helper-data.R
│   │   ├── setup.R
│   │   ├── test-aliases.R
│   │   ├── test-assume.R
│   │   ├── test-calculate.R
│   │   ├── test-fit.R
│   │   ├── test-generate.R
│   │   ├── test-get_confidence_interval.R
│   │   ├── test-get_p_value.R
│   │   ├── test-hypothesize.R
│   │   ├── test-observe.R
│   │   ├── test-print.R
│   │   ├── test-rep_sample_n.R
│   │   ├── test-shade_confidence_interval.R
│   │   ├── test-shade_p_value.R
│   │   ├── test-specify.R
│   │   ├── test-utils.R
│   │   ├── test-visualize.R
│   │   └── test-wrappers.R
│   └── testthat.R
└── vignettes/
    ├── anova.Rmd
    ├── chi_squared.Rmd
    ├── infer.Rmd
    ├── infer_cache/
    │   └── html/
    │       ├── __packages
    │       ├── calculate-point_94c073b633c3cf7bef3252dcad544ee2.RData
    │       ├── calculate-point_94c073b633c3cf7bef3252dcad544ee2.rdb
    │       ├── calculate-point_94c073b633c3cf7bef3252dcad544ee2.rdx
    │       ├── generate-permute_21b25928d642a97a30057306d51f1b23.RData
    │       ├── generate-permute_21b25928d642a97a30057306d51f1b23.rdb
    │       ├── generate-permute_21b25928d642a97a30057306d51f1b23.rdx
    │       ├── generate-point_d562524427be20dbb4736ca1ea29b04b.RData
    │       ├── generate-point_d562524427be20dbb4736ca1ea29b04b.rdb
    │       ├── generate-point_d562524427be20dbb4736ca1ea29b04b.rdx
    │       ├── hypothesize-40-hr-week_c8e33c404efa90c2ca0b2eacad95b06c.RData
    │       ├── hypothesize-40-hr-week_c8e33c404efa90c2ca0b2eacad95b06c.rdb
    │       ├── hypothesize-40-hr-week_c8e33c404efa90c2ca0b2eacad95b06c.rdx
    │       ├── hypothesize-independence_fe1c79b9f1dc0df488828fdd34c8145f.RData
    │       ├── hypothesize-independence_fe1c79b9f1dc0df488828fdd34c8145f.rdb
    │       ├── hypothesize-independence_fe1c79b9f1dc0df488828fdd34c8145f.rdx
    │       ├── specify-diff-in-means_e4103c4c3e3daedd5c1429b7a1bc8727.RData
    │       ├── specify-diff-in-means_e4103c4c3e3daedd5c1429b7a1bc8727.rdb
    │       ├── specify-diff-in-means_e4103c4c3e3daedd5c1429b7a1bc8727.rdx
    │       ├── specify-example_3ea3cfa390233b127dc25b05b0354bcf.RData
    │       ├── specify-example_3ea3cfa390233b127dc25b05b0354bcf.rdb
    │       ├── specify-example_3ea3cfa390233b127dc25b05b0354bcf.rdx
    │       ├── specify-one_149be66261b0606b7ddb80efd10fa81d.RData
    │       ├── specify-one_149be66261b0606b7ddb80efd10fa81d.rdb
    │       ├── specify-one_149be66261b0606b7ddb80efd10fa81d.rdx
    │       ├── specify-success_e8eb15e9f621ccf60cb6527a6bccdb4b.RData
    │       ├── specify-success_e8eb15e9f621ccf60cb6527a6bccdb4b.rdb
    │       ├── specify-success_e8eb15e9f621ccf60cb6527a6bccdb4b.rdx
    │       ├── specify-two_20085531c110a936ee691162f225333b.RData
    │       ├── specify-two_20085531c110a936ee691162f225333b.rdb
    │       └── specify-two_20085531c110a936ee691162f225333b.rdx
    ├── observed_stat_examples.Rmd
    ├── paired.Rmd
    └── t_test.Rmd

================================================
FILE CONTENTS
================================================

================================================
FILE: .Rbuildignore
================================================
^CRAN-RELEASE$
^.*\.Rproj$
^\.Rproj\.user$
^README\.Rmd$
^figs$
^profiles*
^examples*
^codecov\.yml$
^docs*
^CONDUCT\.md$
^cran-comments\.md$
^_build\.sh$
^appveyor\.yml$
^\.implement_new_methods\.md
^CONTRIBUTING\.md$
^TO-DO\.md$
^\.httr-oauth$
^_pkgdown.yml
^_pkgdown\.yml$
^docs$
^data-raw*
^doc$
^Meta$
README_files/
^pkgdown$
^\.github$
^LICENSE\.md$
^man-roxygen$
^[\.]?air\.toml$
^\.vscode$
inst/hex/


================================================
FILE: .gitattributes
================================================
* text=auto
data/* binary
src/* text=lf
R/* text=lf

================================================
FILE: .github/.gitignore
================================================
*.html


================================================
FILE: .github/CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community include:

* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
  community

Examples of unacceptable behavior include:

* The use of sexualized language or imagery, and sexual attention or advances of
  any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
  without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at codeofconduct@posit.co. 
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series of
actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within the
community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
<https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.

Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion].

For answers to common questions about this code of conduct, see the FAQ at
<https://www.contributor-covenant.org/faq>. Translations are available at <https://www.contributor-covenant.org/translations>.

[homepage]: https://www.contributor-covenant.org


================================================
FILE: .github/workflows/R-CMD-check.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
#
# NOTE: This workflow is overkill for most R packages and
# check-standard.yaml is likely a better choice.
# usethis::use_github_action("check-standard") will install it.
on:
  push:
    branches: [main, master]
  pull_request:

name: R-CMD-check.yaml

permissions: read-all

jobs:
  R-CMD-check:
    runs-on: ${{ matrix.config.os }}

    name: ${{ matrix.config.os }} (${{ matrix.config.r }})

    strategy:
      fail-fast: false
      matrix:
        config:
          - {os: macos-latest,   r: 'release'}

          - {os: windows-latest, r: 'release'}

          - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
          - {os: ubuntu-latest,  r: 'release'}
          - {os: ubuntu-latest,  r: 'oldrel-1'}
          - {os: ubuntu-latest,  r: 'oldrel-2'}
          - {os: ubuntu-latest,  r: 'oldrel-3'}
          - {os: ubuntu-latest,  r: 'oldrel-4'}

    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
      R_KEEP_PKG_SOURCE: yes

    steps:
      - uses: actions/checkout@v4

      - uses: r-lib/actions/setup-pandoc@v2

      - uses: r-lib/actions/setup-r@v2
        with:
          r-version: ${{ matrix.config.r }}
          http-user-agent: ${{ matrix.config.http-user-agent }}
          use-public-rspm: true

      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          extra-packages: any::rcmdcheck
          needs: check

      - uses: r-lib/actions/check-r-package@v2
        with:
          upload-snapshots: true
          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'


================================================
FILE: .github/workflows/check-hard.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
#
# NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends,
# Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never
# installed, with the exception of testthat, knitr, and rmarkdown. The cache is
# never used to avoid accidentally restoring a cache containing a suggested
# dependency.
on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

name: R-CMD-check-hard

jobs:
  R-CMD-check:
    runs-on: ${{ matrix.config.os }}

    name: ${{ matrix.config.os }} (${{ matrix.config.r }})

    strategy:
      fail-fast: false
      matrix:
        config:
          - {os: ubuntu-latest,   r: 'release'}

    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
      R_KEEP_PKG_SOURCE: yes

    steps:
      - uses: actions/checkout@v2

      - uses: r-lib/actions/setup-pandoc@v2

      - uses: r-lib/actions/setup-r@v2
        with:
          r-version: ${{ matrix.config.r }}
          http-user-agent: ${{ matrix.config.http-user-agent }}
          use-public-rspm: true

      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          dependencies: '"hard"'
          cache: false
          extra-packages: |
            any::rcmdcheck
            any::testthat
            any::knitr
            any::rmarkdown
          needs: check

      - uses: r-lib/actions/check-r-package@v2
        with:
          upload-snapshots: true

================================================
FILE: .github/workflows/lock.yaml
================================================
name: 'Lock Threads'

on:
  schedule:
    - cron: '0 0 * * *'

jobs:
  lock:
    runs-on: ubuntu-latest
    steps:
      - uses: dessant/lock-threads@v2
        with:
          github-token: ${{ github.token }}
          issue-lock-inactive-days: '14'
#          issue-exclude-labels: ''
#          issue-lock-labels: 'outdated'
          issue-lock-comment: >
            This issue has been automatically locked. If you believe you have
            found a related problem, please file a new issue (with a reprex:
            <https://reprex.tidyverse.org>) and link to this issue.
          issue-lock-reason: ''
          pr-lock-inactive-days: '14'
#          pr-exclude-labels: 'wip'
          pr-lock-labels: ''
          pr-lock-comment: >
            This pull request has been automatically locked. If you believe you
            have found a related problem, please file a new issue (with a reprex:
            <https://reprex.tidyverse.org>) and link to this issue.
          pr-lock-reason: ''
#          process-only: 'issues'


================================================
FILE: .github/workflows/pkgdown.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
  push:
    branches: [main, master]
  pull_request:
  release:
    types: [published]
  workflow_dispatch:

name: pkgdown.yaml

permissions: read-all

jobs:
  pkgdown:
    runs-on: ubuntu-latest
    # Only restrict concurrency for non-PR jobs
    concurrency:
      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v4

      - uses: r-lib/actions/setup-pandoc@v2

      - uses: r-lib/actions/setup-r@v2
        with:
          use-public-rspm: true

      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          extra-packages: any::pkgdown, local::.
          needs: website

      - name: Build site
        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
        shell: Rscript {0}

      - name: Deploy to GitHub pages 🚀
        if: github.event_name != 'pull_request'
        uses: JamesIves/github-pages-deploy-action@v4.5.0
        with:
          clean: false
          branch: gh-pages
          folder: docs


================================================
FILE: .github/workflows/pr-commands.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
  issue_comment:
    types: [created]

name: pr-commands.yaml

permissions: read-all

jobs:
  document:
    if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
    name: document
    runs-on: ubuntu-latest
    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v4

      - uses: r-lib/actions/pr-fetch@v2
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}

      - uses: r-lib/actions/setup-r@v2
        with:
          use-public-rspm: true

      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          extra-packages: any::roxygen2
          needs: pr-document

      - name: Document
        run: roxygen2::roxygenise()
        shell: Rscript {0}

      - name: commit
        run: |
          git config --local user.name "$GITHUB_ACTOR"
          git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
          git add man/\* NAMESPACE
          git commit -m 'Document'

      - uses: r-lib/actions/pr-push@v2
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}

  style:
    if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
    name: style
    runs-on: ubuntu-latest
    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v4

      - uses: r-lib/actions/pr-fetch@v2
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}

      - uses: r-lib/actions/setup-r@v2

      - name: Install dependencies
        run: install.packages("styler")
        shell: Rscript {0}

      - name: Style
        run: styler::style_pkg()
        shell: Rscript {0}

      - name: commit
        run: |
          git config --local user.name "$GITHUB_ACTOR"
          git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
          git add \*.R
          git commit -m 'Style'

      - uses: r-lib/actions/pr-push@v2
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/test-coverage.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
  push:
    branches: [main, master]
  pull_request:

name: test-coverage.yaml

permissions: read-all

jobs:
  test-coverage:
    runs-on: ubuntu-latest
    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

    steps:
      - uses: actions/checkout@v4

      - uses: r-lib/actions/setup-r@v2
        with:
          use-public-rspm: true

      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          extra-packages: any::covr, any::xml2
          needs: coverage

      - name: Test coverage
        run: |
          cov <- covr::package_coverage(
            quiet = FALSE,
            clean = FALSE,
            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
          )
          print(cov)
          covr::to_cobertura(cov)
        shell: Rscript {0}

      - uses: codecov/codecov-action@v5
        with:
          # Fail if error if not on PR, or if on PR and token is given
          fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
          files: ./cobertura.xml
          plugins: noop
          disable_search: true
          token: ${{ secrets.CODECOV_TOKEN }}

      - name: Show testthat output
        if: always()
        run: |
          ## --------------------------------------------------------------------
          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
        shell: bash

      - name: Upload test results
        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: coverage-test-failures
          path: ${{ runner.temp }}/package


================================================
FILE: .gitignore
================================================
.Rproj.user
.Rhistory
.RData
.Ruserdata
.DS_Store
.httr-oauth
doc
Meta
docs
*.psd


================================================
FILE: .vscode/extensions.json
================================================
{
    "recommendations": [
        "Posit.air-vscode"
    ]
}


================================================
FILE: .vscode/settings.json
================================================
{
    "[r]": {
        "editor.formatOnSave": true,
        "editor.defaultFormatter": "Posit.air-vscode"
    }
}


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing

Contributions to the `infer` whether in the form of bug fixes, issue reports, new
code or documentation improvements are encouraged and welcome. We welcome novices
who may have never contributed to a package before as well as friendly
veterans looking to help us improve the package for users. We are eager to include
and accepting of contributions from everyone that meets our [code of conduct](.github/CODE_OF_CONDUCT.md)
guidelines.

Please use the GitHub issues. For any pull request, please link to or open a
corresponding issue in GitHub issues. Please ensure that you have notifications
turned on and respond to questions, comments or needed changes promptly.

##  Tests

`infer` uses `testthat` for testing. Please try to provide 100% test coverage
for any submitted code and always check that existing tests continue to pass.
If you are a beginner and need help with writing a test, mention this
in the issue and we will try to help.

It's also helpful to run `goodpractice::gp()` to ensure that lines of code are
not over 80 characters and that all lines of code have tests written. Please do
so prior to submitting any pull request and fix any suggestions from there.
Reach out to us if you need any assistance there too.

## Code style

Please use snake case (such as `rep_sample_n`) for function names.
Besides that, in general follow the 
[tidyverse style](http://style.tidyverse.org/) for R. 

## Code of Conduct

When contributing to the `infer` package you must follow the code of 
conduct defined in [CONDUCT](.github/CODE_OF_CONDUCT.md).


================================================
FILE: DESCRIPTION
================================================
Type: Package
Package: infer
Title: Tidy Statistical Inference
Version: 1.1.0.9000
Authors@R: c(
    person("Andrew", "Bray", , "abray@reed.edu", role = "aut"),
    person("Chester", "Ismay", , "chester.ismay@gmail.com", role = "aut",
           comment = c(ORCID = "0000-0003-2820-2547")),
    person("Evgeni", "Chasnovski", , "evgeni.chasnovski@gmail.com", role = "aut",
           comment = c(ORCID = "0000-0002-1617-4019")),
    person("Simon", "Couch", , "simon.couch@posit.co", role = c("aut", "cre"),
           comment = c(ORCID = "0000-0001-5676-5107")),
    person("Ben", "Baumer", , "ben.baumer@gmail.com", role = "aut",
           comment = c(ORCID = "0000-0002-3279-0516")),
    person("Mine", "Cetinkaya-Rundel", , "mine@stat.duke.edu", role = "aut",
           comment = c(ORCID = "0000-0001-6452-2420")),
    person("Ted", "Laderas", , "tedladeras@gmail.com", role = "ctb",
           comment = c(ORCID = "0000-0002-6207-7068")),
    person("Nick", "Solomon", , "nick.solomon@datacamp.com", role = "ctb"),
    person("Johanna", "Hardin", , "Jo.Hardin@pomona.edu", role = "ctb"),
    person("Albert Y.", "Kim", , "albert.ys.kim@gmail.com", role = "ctb",
           comment = c(ORCID = "0000-0001-7824-306X")),
    person("Neal", "Fultz", , "nfultz@gmail.com", role = "ctb"),
    person("Doug", "Friedman", , "doug.nhp@gmail.com", role = "ctb"),
    person("Richie", "Cotton", , "richie@datacamp.com", role = "ctb",
           comment = c(ORCID = "0000-0003-2504-802X")),
    person("Brian", "Fannin", , "captain@pirategrunt.com", role = "ctb")
  )
Description: The objective of this package is to perform inference using
    an expressive statistical grammar that coheres with the tidy design
    framework.
License: MIT + file LICENSE
URL: https://github.com/tidymodels/infer, https://infer.tidymodels.org/
BugReports: https://github.com/tidymodels/infer/issues
Depends:
    R (>= 4.1)
Imports:
    broom,
    cli,
    dplyr (>= 0.7.0),
    generics,
    ggplot2 (>= 3.5.2),
    glue (>= 1.3.0),
    grDevices,
    lifecycle,
    magrittr,
    methods,
    patchwork,
    purrr,
    rlang (>= 0.2.0),
    tibble,
    tidyr,
    vctrs (>= 0.6.5),
    withr
Suggests:
    covr,
    devtools (>= 1.12.0),
    fs,
    knitr,
    nycflights13,
    parsnip,
    rmarkdown,
    stringr,
    testthat (>= 3.0.0),
    vdiffr (>= 1.0.0)
VignetteBuilder:
    knitr
Config/Needs/website: tidyverse/tidytemplate
Config/testthat/edition: 3
Config/usethis/last-upkeep: 2025-04-25
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.3


================================================
FILE: LICENSE
================================================
YEAR: 2025
COPYRIGHT HOLDER: infer authors


================================================
FILE: LICENSE.md
================================================
# MIT License

Copyright (c) 2025 infer authors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand

S3method(calc_impl,"function")
S3method(calc_impl,Chisq)
S3method(calc_impl,F)
S3method(calc_impl,correlation)
S3method(calc_impl,count)
S3method(calc_impl,diff_in_means)
S3method(calc_impl,diff_in_medians)
S3method(calc_impl,diff_in_props)
S3method(calc_impl,function_of_props)
S3method(calc_impl,mean)
S3method(calc_impl,median)
S3method(calc_impl,odds_ratio)
S3method(calc_impl,prop)
S3method(calc_impl,ratio_of_means)
S3method(calc_impl,ratio_of_props)
S3method(calc_impl,sd)
S3method(calc_impl,slope)
S3method(calc_impl,sum)
S3method(calc_impl,t)
S3method(calc_impl,z)
S3method(fit,infer)
S3method(get_p_value,default)
S3method(get_p_value,infer_dist)
S3method(ggplot_add,infer_layer)
S3method(print,infer)
S3method(print,infer_dist)
S3method(print,infer_layer)
export("%>%")
export(assume)
export(calculate)
export(chisq_stat)
export(chisq_test)
export(conf_int)
export(fit)
export(fit.infer)
export(generate)
export(get_ci)
export(get_confidence_interval)
export(get_p_value)
export(get_pvalue)
export(ggplot_add)
export(hypothesise)
export(hypothesize)
export(observe)
export(p_value)
export(prop_test)
export(rep_sample_n)
export(rep_slice_sample)
export(shade_ci)
export(shade_confidence_interval)
export(shade_p_value)
export(shade_pvalue)
export(specify)
export(t_stat)
export(t_test)
export(visualise)
export(visualize)
importFrom(cli,cli_abort)
importFrom(cli,cli_inform)
importFrom(cli,cli_warn)
importFrom(cli,no)
importFrom(cli,qty)
importFrom(dplyr,across)
importFrom(dplyr,any_of)
importFrom(dplyr,bind_rows)
importFrom(dplyr,group_by)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,summarize)
importFrom(generics,fit)
importFrom(ggplot2,aes)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_histogram)
importFrom(ggplot2,geom_rect)
importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggplot_add)
importFrom(ggplot2,labs)
importFrom(glue,glue)
importFrom(glue,glue_collapse)
importFrom(magrittr,"%>%")
importFrom(methods,hasArg)
importFrom(purrr,compact)
importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,caller_env)
importFrom(rlang,enquo)
importFrom(rlang,eval_tidy)
importFrom(rlang,f_lhs)
importFrom(rlang,f_rhs)
importFrom(rlang,get_expr)
importFrom(rlang,new_formula)
importFrom(rlang,quo)
importFrom(rlang,sym)
importFrom(stats,as.formula)
importFrom(stats,dchisq)
importFrom(stats,df)
importFrom(stats,dnorm)
importFrom(stats,dt)
importFrom(stats,qchisq)
importFrom(stats,qf)
importFrom(stats,qnorm)
importFrom(stats,qt)
importFrom(tibble,tibble)


================================================
FILE: NEWS.md
================================================
# infer (development version)

# infer 1.1.0

* Fixed bug where adding `shade_confidence_interval(NULL)` or `shade_p_value(NULL)` 
  to plots resulted in `list()` rather than the unmodified plot (#566).

* Introduced support for arbitrary test statistics in `calculate()`. In addition
  to the pre-implemented `calculate(stat)` options, taken as strings, users can
  now supply a function defining any scalar-valued test statistic. See
  `?calculate()` to learn more (#542).

# infer 1.0.9

* Replaced usage of deprecated functions ahead of a new release of the ggplot2 package (#557).

* Addressed narrative mistakes in the `t_test` vignette (#556).

* Increased the minimum required R version to R 4.1

# infer 1.0.8

* The infer print method now truncates output when descriptions of explanatory or responses variables exceed the console width (#543).

* Added missing commas and addressed formatting issues throughout the vignettes and articles. Backticks for package names were removed and missing parentheses for functions were added (@Joscelinrocha).


# infer 1.0.7

* The aliases `p_value()` and `conf_int()`, first deprecated 6 years ago, now
  return an error (#530).
  
* Addresses ggplot2 warnings when shading p-values for test statistics
  that are outside of the range of the generated distribution (#528).

* Fixed bug in `shade_p_value()` and `shade_confidence_interval()` where `fill = NULL` was ignored when it was documented as preventing any shading (#525).

# infer v1.0.6

* Updated infrastructure for errors, warnings, and messages (#513). Most of these changes will not be visible to users, though:
     - Many longer error messages are now broken up into several lines.
     - For references to help-files, users can now click on the error message's text to navigate to the cited documentation.
     
* Various improvements to documentation (#501, #504, #508, #512).

* Fixed bug where `get_confidence_interval()` would error uninformatively when the supplied distribution of estimates contained missing values. The function will now warn and return a confidence interval calculated using the non-missing estimates (#521).

* Fixed bug where `generate()` could not be used without first `specify()`ing variables, even in cases where that specification would not affect resampling/simulation (#448). 

# infer v1.0.5

* Implemented support for permutation hypothesis tests for paired data via the 
  argument value `null = "paired independence"` in `hypothesize()` (#487).

* The `weight_by` argument to `rep_slice_sample()` can now be passed either as a vector of numeric weights or an unquoted column name in `.data` (#480).

* Newly accommodates variables with spaces in names in the wrapper functions `t_test()` and `prop_test()` (#472).

* Fixed bug in two-sample `prop_test()` where the response and explanatory 
  variable were passed in place of each other to `prop.test()`. This enables
  using `prop_test()` with explanatory variables with greater than 2 levels and,
  in the process, addresses a bug where `prop_test()` collapsed levels other than
  the `success` when the response variable had more than 2 levels.

# infer v1.0.4

* Fixed bug in p-value shading where shaded regions no longer correctly overlaid
  histogram bars.
* Addressed deprecation warning ahead of upcoming dplyr release.

# infer v1.0.3

* Fix R-devel HTML5 NOTEs.

# infer v1.0.2

* Fix p-value shading when the calculated statistic falls exactly on the boundaries of a histogram bin (#424).
* Fix `generate()` errors when columns are named `x` (#431).
* Fix error from `visualize` when passed `generate()`d `infer_dist` objects that had not been passed to `hypothesize()` (#432). 
* Update visual checks for `visualize` output to align with the R 4.1.0+ graphics engine (#438).
* `specify()` and wrapper functions now appropriately handle ordered factors (#439).
* Clarify error when incompatible statistics and hypotheses are supplied (#441).
* Updated `generate()` unexpected `type` warnings to be more permissive—the 
warning will be raised less often when `type = "bootstrap"` (#425).
* Allow passing additional arguments to `stats::chisq.test` via `...` in 
`calculate()`. Ellipses are now always passed to the applicable base R
hypothesis testing function, when applicable (#414)!
* The package will now set the levels of logical variables on conversion to factor
so that the first level (regarded as `success` by default) is `TRUE`. Core verbs
have warned without an explicit `success` value already, and this change makes
behavior consistent with the functions being wrapped by shorthand test 
wrappers (#440).
* Added new statistic `stat = "ratio of means"` (#452).

# infer v1.0.1 (GitHub Only)

This release reflects the infer version accepted to the Journal of Open Source Software.

* Re-licensed the package from CC0 to MIT. See the `LICENSE` and `LICENSE.md` files.
* Contributed a paper to the Journal of Open Source Software, a draft of which is available in `/figs/paper`.
* Various improvements to documentation (#417, #418).

# infer 1.0.0

v1.0.0 is the first major release of the {infer} package! By and large, the core verbs `specify()`, `hypothesize()`, `generate()`, and `calculate()` will interface as they did before. This release makes several improvements to behavioral consistency of the package and introduces support for theory-based inference as well as randomization-based inference with multiple explanatory variables.

## Behavioral consistency

A major change to the package in this release is a set of standards for behavioral consistency of `calculate()` (#356). Namely, the package will now

* supply a consistent error when the supplied `stat` argument isn't well-defined
for the variables `specify()`d

``` r
gss %>%
  specify(response = hours) %>%
  calculate(stat = "diff in means")
#> Error: A difference in means is not well-defined for a 
#> numeric response variable (hours) and no explanatory variable.
```

or

``` r
gss %>%
  specify(college ~ partyid, success = "degree") %>%
  calculate(stat = "diff in props")
#> Error: A difference in proportions is not well-defined for a dichotomous categorical 
#> response variable (college) and a multinomial categorical explanatory variable (partyid).
```

* supply a consistent message when the user supplies unneeded information via `hypothesize()` to `calculate()` an observed statistic

``` r
# supply mu = 40 when it's not needed
gss %>%
  specify(response = hours) %>%
  hypothesize(null = "point", mu = 40) %>%
  calculate(stat = "mean")
#> Message: The point null hypothesis `mu = 40` does not inform calculation of 
#> the observed statistic (a mean) and will be ignored.
#> # A tibble: 1 x 1
#>    stat
#>   <dbl>
#> 1  41.4
```

and

* supply a consistent warning and assume a reasonable null value when the user does not supply sufficient information to calculate an observed statistic

``` r
# don't hypothesize `p` when it's needed
gss %>%
    specify(response = sex, success = "female") %>%
    calculate(stat = "z")
#> # A tibble: 1 x 1
#>    stat
#>   <dbl>
#> 1 -1.16
#> Warning message:
#> A z statistic requires a null hypothesis to calculate the observed statistic. 
#> Output assumes the following null value: `p = .5`. 
```

or

``` r
# don't hypothesize `p` when it's needed
gss %>%
  specify(response = partyid) %>%
  calculate(stat = "Chisq")
#> # A tibble: 1 x 1
#>    stat
#>  <dbl>
#> 1  334.
#> Warning message:
#> A chi-square statistic requires a null hypothesis to calculate the observed statistic. 
#> Output assumes the following null values: `p = c(dem = 0.2, ind = 0.2, rep = 0.2, other = 0.2, DK = 0.2)`.
```

To accommodate this behavior, a number of new `calculate` methods were added or improved. Namely:

- Implemented the standardized proportion $z$ statistic for one categorical variable
- Extended `calculate()` with `stat = "t"` by passing `mu` to the `calculate()` method for `stat = "t"` to allow for calculation of `t` statistics for one numeric variable with hypothesized mean
- Extended `calculate()` to allow lowercase aliases for `stat` arguments (#373).
- Fixed bugs in `calculate()` for to allow for programmatic calculation of statistics

This behavioral consistency also allowed for the implementation of `observe()`, a wrapper function around `specify()`, `hypothesize()`, and `calculate()`, to calculate observed statistics. The function provides a shorthand alternative to calculating observed statistics from data:

``` r
# calculating the observed mean number of hours worked per week
gss %>%
  observe(hours ~ NULL, stat = "mean")
#> # A tibble: 1 x 1
#>    stat
#>   <dbl>
#> 1  41.4

# equivalently, calculating the same statistic with the core verbs
gss %>%
  specify(response = hours) %>%
  calculate(stat = "mean")
#> # A tibble: 1 x 1
#>    stat
#>   <dbl>
#> 1  41.4

# calculating a t statistic for hypothesized mu = 40 hours worked/week
gss %>%
  observe(hours ~ NULL, stat = "t", null = "point", mu = 40)
#> # A tibble: 1 x 1
#>    stat
#>   <dbl>
#> 1  2.09

# equivalently, calculating the same statistic with the core verbs
gss %>%
  specify(response = hours) %>%
  hypothesize(null = "point", mu = 40) %>%
  calculate(stat = "t")
#> # A tibble: 1 x 1
#>    stat
#>   <dbl>
#> 1  2.09
```

We don't anticipate that these changes are "breaking" in the sense that code that previously worked will continue to, though it may now message or warn in a way that it did not used to or error with a different (and hopefully more informative) message.

## A framework for theoretical inference

This release also introduces a more complete and principled interface for theoretical inference. While the package previously supplied some methods for visualization of theory-based curves, the interface did not provide any object that was explicitly a "null distribution" that could be supplied to helper functions like `get_p_value()` and `get_confidence_interval()`. The new interface is based on a new verb, `assume()`, that returns a null distribution that can be interfaced in the same way that simulation-based null distributions can be interfaced with.

As an example, we'll work through a full infer pipeline for inference on a mean using infer's `gss` dataset. Supposed that we believe the true mean number of hours worked by Americans in the past week is 40.

First, calculating the observed `t`-statistic:

``` r
obs_stat <- gss %>%
  specify(response = hours) %>%
  hypothesize(null = "point", mu = 40) %>%
  calculate(stat = "t")

obs_stat
#> Response: hours (numeric)
#> Null Hypothesis: point
#> # A tibble: 1 x 1
#>    stat
#>   <dbl>
#> 1  2.09
```

The code to define the null distribution is very similar to that required to calculate a theorized observed statistic, switching out `calculate()` for `assume()` and replacing arguments as needed.

``` r
null_dist <- gss %>%
  specify(response = hours) %>%
  assume(distribution = "t")

null_dist 
#> A T distribution with 499 degrees of freedom.
```

This null distribution can now be interfaced with in the same way as a simulation-based null distribution elsewhere in the package. For example, calculating a p-value by juxtaposing the observed statistic and null distribution:

``` r
get_p_value(null_dist, obs_stat, direction = "both")
#> # A tibble: 1 x 1
#>   p_value
#>     <dbl>
#> 1  0.0376
```

…or visualizing the null distribution alone:

``` r
visualize(null_dist)
```

![](https://i.imgur.com/g3B5coD.png)

…or juxtaposing the two visually:

``` r
visualize(null_dist) + 
  shade_p_value(obs_stat, direction = "both")
```

![](https://i.imgur.com/3C66kgK.png)

Confidence intervals lie in data space rather than the standardized scale of the theoretical distributions. Calculating a mean rather than the standardized `t`-statistic:

``` r
obs_mean <- gss %>%
  specify(response = hours) %>%
  calculate(stat = "mean")
```

The null distribution here just defines the spread for the standard error calculation.

``` r
ci <- 
  get_confidence_interval(
    null_dist,
    level = .95,
    point_estimate = obs_mean
  )

ci
#> # A tibble: 1 x 2
#>   lower_ci upper_ci
#>      <dbl>    <dbl>
#> 1     40.1     42.7
```

Visualizing the confidence interval results in the theoretical distribution being recentered and rescaled to align with the scale of the observed data:

``` r
visualize(null_dist) + 
  shade_confidence_interval(ci)
```

![](https://i.imgur.com/4akSCY3.png)

Previous methods for interfacing with theoretical distributions are superseded—they will continue to be supported, though documentation will forefront the `assume()` interface.

## Support for multiple regression

The 2016 "Guidelines for Assessment and Instruction in Statistics Education" [1] state that, in introductory statistics courses, "[s]tudents should gain experience with how statistical models, including multivariable models, are used." In line with this recommendation, we introduce support for randomization-based inference with multiple explanatory variables via a new `fit.infer` core verb.

If passed an `infer` object, the method will parse a formula out of the `formula` or `response` and `explanatory` arguments, and pass both it and `data` to a `stats::glm` call.

``` r
gss %>%
  specify(hours ~ age + college) %>%
  fit()
#> # A tibble: 3 x 2
#>   term          estimate
#>   <chr>            <dbl>
#> 1 intercept     40.6    
#> 2 age            0.00596
#> 3 collegedegree  1.53
```

Note that the function returns the model coefficients as `estimate` rather than their associated `t`-statistics as `stat`.

If passed a `generate()`d object, the model will be fitted to each replicate.

``` r
gss %>%
  specify(hours ~ age + college) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 100, type = "permute") %>%
  fit()
#> # A tibble: 300 x 3
#> # Groups:   replicate [100]
#>    replicate term          estimate
#>        <int> <chr>            <dbl>
#>  1         1 intercept     44.4    
#>  2         1 age           -0.0767 
#>  3         1 collegedegree  0.121  
#>  4         2 intercept     41.8    
#>  5         2 age            0.00344
#>  6         2 collegedegree -1.59   
#>  7         3 intercept     38.3    
#>  8         3 age            0.0761 
#>  9         3 collegedegree  0.136  
#> 10         4 intercept     43.1    
#> # … with 290 more rows
```

If `type = "permute"`, a set of unquoted column names in the data to permute (independently of each other) can be passed via the `variables` argument to `generate`. It defaults to only the response variable.

``` r
gss %>%
  specify(hours ~ age + college) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 100, type = "permute", variables = c(age, college)) %>%
  fit()
#> # A tibble: 300 x 3
#> # Groups:   replicate [100]
#>    replicate term          estimate
#>        <int> <chr>            <dbl>
#>  1         1 intercept      39.4   
#>  2         1 age             0.0748
#>  3         1 collegedegree  -2.98  
#>  4         2 intercept      42.8   
#>  5         2 age            -0.0190
#>  6         2 collegedegree  -1.83  
#>  7         3 intercept      40.4   
#>  8         3 age             0.0354
#>  9         3 collegedegree  -1.31  
#> 10         4 intercept      40.9   
#> # … with 290 more rows
```

This feature allows for more detailed exploration of the effect of disrupting the correlation structure among explanatory variables on outputted model coefficients.

Each of the auxillary functions `get_p_value()`, `get_confidence_interval()`, `visualize()`, `shade_p_value()`, and `shade_confidence_interval()` have methods to handle `fit()` output! See their help-files for example usage. Note that `shade_*` functions now delay evaluation until they are added to an existing ggplot (e.g. that outputted by `visualize()`) with `+`.

## Improvements

- Following extensive discussion, the `generate()` type `type = "simulate"` has been renamed to the more evocative `type = "draw"`. We will continue to support `type = "simulate"` indefinitely, though supplying that argument will now prompt a message notifying the user of its preferred alias. (#233, #390)
- Fixed several bugs related to factors with unused levels. `specify()` will now drop unused factor levels and message that it has done so. (#374, #375, #397, #380)
- Added `two.sided` as an acceptable alias for `two_sided` for the `direction` argument in `get_p_value()` and `shade_p_value()`. (#355)
- Various improvements to documentation, including extending example sections in help-files, re-organizing the function reference in the {pkgdown} site, and linking more extensively among help-files.

## Breaking changes

We don't anticipate that any changes made in this release are "breaking" in the sense that code that previously worked will continue to, though it may now message or warn in a way that it did not used to or error with a different (and hopefully more informative) message. If you currently teach or research with infer, we recommend re-running your materials and noting any changes in messaging and warning.

- Move forward with a number of planned deprecations. Namely, the `GENERATION_TYPES` object is now fully deprecated, and arguments that were relocated from `visualize()` to `shade_p_value()` and `shade_confidence_interval()` are now fully deprecated in `visualize()`. If supplied a deprecated argument, `visualize()` will warn the user and ignore the argument.
- Added a `prop` argument to `rep_slice_sample()` as an alternative to the `n`
argument for specifying the proportion of rows in the supplied data to sample
per replicate (#361, #362, #363). This changes order of arguments of
`rep_slice_sample()` (in order to be more aligned with `dplyr::slice_sample()`)
which might break code if it didn't use named arguments (like
`rep_slice_sample(df, 5, TRUE)`). To fix this, use named arguments (like
`rep_slice_sample(df, 5, replicate = TRUE)`).

## Other

- Added Simon P. Couch as an author. Long deserved for his reliable maintenance and improvements of the package.

[1]: GAISE College Report ASA Revision Committee, "Guidelines for Assessment and Instruction in Statistics Education College Report 2016," http://www.amstat.org/education/gaise. 

# infer 0.5.4

- `rep_sample_n()` no longer errors when supplied a `prob` argument (#279)
- Added `rep_slice_sample()`, a light wrapper around `rep_sample_n()`, that
more closely resembles `dplyr::slice_sample()` (the function that supersedes
`dplyr::sample_n()`) (#325)
- Added a `success`, `correct`, and `z` argument to `prop_test()` 
(#343, #347, #353)
- Implemented observed statistic calculation for the standardized proportion 
$z$ statistic (#351, #353)
- Various bug fixes and improvements to documentation and errors.

# infer 0.5.3

## Breaking changes

- `get_confidence_interval()` now uses column names ('lower_ci' and 'upper_ci') 
in output that are consistent with other infer functionality (#317).

## New functionality

- `get_confidence_interval()` can now produce bias-corrected confidence intervals
by setting `type = "bias-corrected"`. Thanks to @davidbaniadam for the 
initial implementation (#237, #318)!

## Other

- Fix CRAN check failures related to long double errors.

# infer 0.5.2

- Warn the user when a p-value of 0 is reported (#257, #273)
- Added new vignettes: `chi_squared` and `anova` (#268)
- Updates to documentation and existing vignettes (#268)
- Add alias for `hypothesize()` (`hypothesise()`) (#271)
- Subtraction order no longer required for difference-based tests--a warning will be raised in the case that the user doesn't supply an `order` argument (#275, #281)
- Add new messages for common errors (#277)
- Increase coverage of theoretical methods in documentation (#278, #280)
- Drop missing values and reduce size of `gss` dataset used in examples (#282)
- Add `stat = "ratio of props"` and `stat = "odds ratio"` to `calculate` (#285)
- Add `prop_test()`, a tidy interface to `prop.test()` (#284, #287)
- Updates to `visualize()` for compatibility with `ggplot2` v3.3.0 (#289)
- Fix error when bootstrapping with small samples and raise warnings/errors 
when appropriate (#239, #244, #291)
- Fix unit test failures resulting from breaking changes in `dplyr` v1.0.0
- Fix error in `generate()` when response variable is named `x` (#299)
- Add `two-sided` and `two sided` as aliases for `two_sided` for the 
`direction` argument in `get_p_value()` and `shade_p_value()` (#302)
- Fix `t_test()` and `t_stat()` ignoring the `order` argument (#310)

# infer 0.5.1

- Updates to documentation and other tweaks

# infer 0.5.0

## Breaking changes

- `shade_confidence_interval()` now plots vertical lines starting from zero (previously - from the bottom of a plot) (#234).
- `shade_p_value()` now uses "area under the curve" approach to shading (#229).

## Other

- Updated `chisq_test()` to take arguments in a response/explanatory format, perform goodness of fit tests, and default to the approximation approach (#241).
- Updated `chisq_stat()` to do goodness of fit (#241).
- Make interface to `hypothesize()` clearer by adding the options for the point null parameters to the function signature (#242).
- Manage `infer` class more systematically (#219).
- Use `vdiffr` for plot testing (#221).

# infer 0.4.1

- Added Evgeni Chasnovski as author for his incredible work on refactoring the package and providing excellent support.

# infer 0.4.0

## Breaking changes

- Changed method of computing two-sided p-value to a more conventional one. It also makes `get_pvalue()` and `visualize()` more aligned (#205).

## Deprecation changes

- Deprecated `p_value()` (use `get_p_value()` instead) (#180).
- Deprecated `conf_int()` (use `get_confidence_interval()` instead) (#180).
- Deprecated (via warnings) plotting p-value and confidence interval in `visualize()` (use new functions `shade_p_value()` and `shade_confidence_interval()` instead) (#178).

## New functions

- `shade_p_value()` - {ggplot2}-like layer function to add information about p-value region to `visualize()` output. Has alias `shade_pvalue()`.
- `shade_confidence_interval()` - {ggplot2}-like layer function to add information about confidence interval region to `visualize()` output. Has alias `shade_ci()`.

## Other

- Account for `NULL` value in left hand side of formula in `specify()` (#156) and `type` in `generate()` (#157).
- Update documentation code to follow tidyverse style guide (#159).
- Remove help page for internal `set_params()` (#165).
- Fully use {tibble} (#166).
- Fix `calculate()` to not depend on order of `p` for `type = "simulate"` (#122).
- Reduce code duplication (#173).
- Make transparency in `visualize()` to not depend on method and data volume.
- Make `visualize()` work for "One sample t" theoretical type with `method = "both"`.
- Add `stat = "sum"` and `stat = "count"` options to `calculate()` (#50).

# infer 0.3.1

- Stop using package {assertive} in favor of custom type checks (#149)
- Fixed `t_stat()` to use `...` so `var.equal` works
- With the help of @echasnovski, fixed `var.equal = TRUE` for `specify() %>% calculate(stat = "t")`
- Use custom functions for error, warning, message, and `paste()` handling (#155)

# infer 0.3.0

- Added `conf_int` logical argument and `conf_level` argument to `t_test()`
- Switched `shade_color` argument in `visualize()` to be `pvalue_fill` instead
since fill color for confidence intervals is also added now
- Shading for Confidence Intervals in `visualize()` 
    - Green is default color for CI and red for p-values
    - `direction = "between"` to get the green shading
    - Currently working only for simulation-based methods
- Implemented `conf_int()` function for computing confidence interval provided a simulation-based method with a `stat` variable
    - `get_ci()` and `get_confidence_interval()` are aliases for `conf_int()`
    - Converted longer confidence interval calculation code in vignettes to use `get_ci()` instead    
- Implemented `p_value()` function for computing p-value provided a simulation-based method with a `stat` variable
    - `get_pvalue()` is an alias for `p_value()`
    - Converted longer p-value calculation code in vignettes to use `get_pvalue()` instead
- Implemented Chi-square Goodness of Fit observed stat depending on `params` being set in `hypothesize` with `specify() %>% calculate()` shortcut
- Removed "standardized" slope $t$ since its formula is different than "standardized" correlation and there is no way currently to give one over the other
- Implemented correlation with bootstrap CI and permutation hypothesis test
- Filled the `type` argument automatically in `generate()` based
on `specify()` and `hypothesize()`
    - Added message if `type` is given differently than expected
- Implemented `specify() %>% calculate()` for getting observed
statistics.
    - `visualize()` works with either a 1x1 data frame or a vector
    for its `obs_stat` argument
    - Got `stat = "t"` working
- Refactored `calculate()` into smaller functions to reduce complexity
- Produced error if `mu` is given in `hypothesize()` but `stat = "median"`
is provided in `calculate()` and other similar mis-specifications
- Tweaked `chisq_stat()` and `t_stat()` to match with `specify() %>% calculate()` framework
    - Both work in the one sample and two sample cases by providing `formula`
    - Added `order` argument to `t_stat()`
- Added implementation of one sample `t_test()` by passing in the `mu` argument to `t.test`
from `hypothesize()`
- Tweaked `pkgdown` page to include ToDo's using [{dplyr}](https://github.com/tidyverse/dplyr/blob/master/_pkgdown.yml) example

# infer 0.2.0

- Switched to `!!` instead of `UQ()` since `UQ()` is deprecated in 
{rlang} 0.2.0
- Added many new files: `CONDUCT.md`, `CONTRIBUTING.md`, and `TO-DO.md`
- Updated README file with more development information
- Added wrapper functions `t_test()` and `chisq_test()` that use a
formula interface and provide an intuitive wrapper to `t.test()` and
`chisq.test()`
- Created `stat = "z"` and `stat = "t"` options
- Added many new arguments to `visualize()` to prescribe colors to shade and 
use for observed statistics and theoretical density curves
- Added check so that a bar graph created with `visualize()` if number of 
unique values for generated statistics is small
- Added shading for `method = "theoretical"` 
- Implemented shading for simulation methods w/o a traditional distribution
    - Use percentiles to determine two-tailed shading
- Changed `method = "randomization"` to `method = "simulation"`
- Added warning when theoretical distribution is used that 
  assumptions should be checked  
- Added theoretical distributions to `visualize()` alone and as overlay with
current implementations being
    - Two sample t
    - ANOVA F
    - One proportion z
    - Two proportion z
    - Chi-square test of independence
    - Chi-square Goodness of Fit test
    - Standardized slope (t)
    
# infer 0.1.1
- Added additional tests
- Added `order` argument in `calculate()`
- Fixed bugs post-CRAN release
- Automated travis build of pkgdown to gh-pages branch

# infer 0.1.0
- Altered the way that successes are indicated in an infer pipeline. 
They now live in `specify()`.
- Updated documentation with examples
- Created `pkgdown` site materials
    - Deployed to https://infer.tidymodels.org/


# infer 0.0.1
- Implemented the "intro stats" examples for randomization methods


================================================
FILE: R/assume.R
================================================
#' Define a theoretical distribution
#'
#' @description
#'
#' This function allows the user to define a null distribution based on
#' theoretical methods. In many infer pipelines, `assume()` can be
#' used in place of [generate()] and [calculate()] to create a null
#' distribution. Rather than outputting a data frame containing a
#' distribution of test statistics calculated from resamples of the observed
#' data, `assume()` outputs a more abstract type of object just containing
#' the distributional details supplied in the `distribution` and `df` arguments.
#' However, `assume()` output can be passed to [visualize()], [get_p_value()],
#' and [get_confidence_interval()] in the same way that simulation-based
#' distributions can.
#'
#' To define a theoretical null distribution (for use in hypothesis testing),
#' be sure to provide a null hypothesis via [hypothesize()]. To define a
#' theoretical sampling distribution (for use in confidence intervals),
#' provide the output of [specify()]. Sampling distributions (only
#' implemented for `t` and `z`) lie on the scale of the data, and will be
#' recentered and rescaled to match the corresponding `stat` given in
#' [calculate()] to calculate the observed statistic.
#'
#' @param x The output of [specify()] or [hypothesize()], giving the
#'   observed data, variable(s) of interest, and (optionally) null hypothesis.
#' @param distribution The distribution in question, as a string. One of
#'   `"F"`, `"Chisq"`, `"t"`, or `"z"`.
#' @param df Optional. The degrees of freedom parameter(s) for the `distribution`
#'   supplied, as a numeric vector. For `distribution = "F"`, this should have
#'   length two (e.g. `c(10, 3)`). For `distribution = "Chisq"` or
#'   `distribution = "t"`, this should have length one. For
#'   `distribution = "z"`, this argument is not required. The package
#'   will supply a message if the supplied `df` argument is different from
#'   recognized values. See the Details section below for more information.
#' @param ... Currently ignored.
#'
#' @return An infer theoretical distribution that can be passed to helpers
#'   like [visualize()], [get_p_value()], and [get_confidence_interval()].
#'
#' @details
#'
#' Note that the assumption being expressed here, for use in theory-based
#' inference, only extends to _distributional_ assumptions: the null
#' distribution in question and its parameters. Statistical inference with
#' infer, whether carried out via simulation (i.e. based on pipelines
#' using [generate()] and [calculate()]) or theory (i.e. with `assume()`),
#' always involves the condition that observations are independent of
#' each other.
#'
#' `infer` only supports theoretical tests on one or two means via the
#' `t` distribution and one or two proportions via the `z`.
#'
#' For tests comparing two means, if `n1` is the group size for one level of
#' the explanatory variable, and `n2` is that for the other level, `infer`
#' will recognize the following degrees of freedom (`df`) arguments:
#'
#' * `min(n1 - 1, n2 - 1)`
#' * `n1 + n2 - 2`
#' * The `"parameter"` entry of the analogous `stats::t.test()` call
#' * The `"parameter"` entry of the analogous `stats::t.test()` call with `var.equal = TRUE`
#'
#' By default, the package will use the `"parameter"` entry of the analogous
#' `stats::t.test()` call with `var.equal = FALSE` (the default).
#'
#' @examples
#' # construct theoretical distributions ---------------------------------
#'
#' # F distribution
#' # with the `partyid` explanatory variable
#' gss |>
#'   specify(age ~ partyid) |>
#'   assume(distribution = "F")
#'
#' # Chi-squared goodness of fit distribution
#' # on the `finrela` variable
#' gss |>
#'   specify(response = finrela) |>
#'   hypothesize(null = "point",
#'               p = c("far below average" = 1/6,
#'                     "below average" = 1/6,
#'                     "average" = 1/6,
#'                     "above average" = 1/6,
#'                     "far above average" = 1/6,
#'                     "DK" = 1/6)) |>
#'   assume("Chisq")
#'
#' # Chi-squared test of independence
#' # on the `finrela` and `sex` variables
#' gss |>
#'   specify(formula = finrela ~ sex) |>
#'   assume(distribution = "Chisq")
#'
#' # T distribution
#' gss |>
#'   specify(age ~ college) |>
#'   assume("t")
#'
#' # Z distribution
#' gss |>
#'   specify(response = sex, success = "female") |>
#'   assume("z")
#'
#' \dontrun{
#' # each of these distributions can be passed to infer helper
#' # functions alongside observed statistics!
#'
#' # for example, a 1-sample t-test -------------------------------------
#'
#' # calculate the observed statistic
#' obs_stat <- gss |>
#'   specify(response = hours) |>
#'   hypothesize(null = "point", mu = 40) |>
#'   calculate(stat = "t")
#'
#' # construct a null distribution
#' null_dist <- gss |>
#'   specify(response = hours) |>
#'   assume("t")
#'
#' # juxtapose them visually
#' visualize(null_dist) +
#'   shade_p_value(obs_stat, direction = "both")
#'
#' # calculate a p-value
#' get_p_value(null_dist, obs_stat, direction = "both")
#'
#' # or, an F test ------------------------------------------------------
#'
#' # calculate the observed statistic
#' obs_stat <- gss |>
#'   specify(age ~ partyid) |>
#'   hypothesize(null = "independence") |>
#'   calculate(stat = "F")
#'
#' # construct a null distribution
#' null_dist <- gss |>
#'   specify(age ~ partyid) |>
#'   assume(distribution = "F")
#'
#' # juxtapose them visually
#' visualize(null_dist) +
#'   shade_p_value(obs_stat, direction = "both")
#'
#' # calculate a p-value
#' get_p_value(null_dist, obs_stat, direction = "both")
#' }
#'
#' @export
assume <- function(x, distribution, df = NULL, ...) {
  if (!inherits(x, "infer")) {
    cli_abort(
      "The {.arg x} argument must be the output of a core infer function, \\
       likely {.fun specify} or {.fun hypothesize}."
    )
  }

  # check that `distribution` aligns with what is expected from
  # `x` and that `distribution` and `df` are consistent with each other
  df <- check_distribution(x, distribution, df, ...)

  structure(
    glue(
      "{distribution_desc(distribution)} distribution{df_desc(df)}.",
      .null = "NULL"
    ),
    # store distribution as the suffix to p* in dist function
    distribution = dist_fn(distribution),
    dist_ = distribution,
    # store df for easier passing to p* functions
    df = df,
    # store df in `specify`-esque format for use in `visualize`
    distr_param = if (length(df) > 0) {
      df[1]
    } else {
      NULL
    },
    distr_param2 = if (length(df) == 2) {
      df[2]
    } else {
      NULL
    },
    # bring along x attributes
    theory_type = attr(x, "theory_type"),
    params = attr(x, "params"),
    hypothesized = attr(x, "hypothesized"),
    # bring along dots
    dots = list(...),
    # append class
    class = "infer_dist"
  )
}

# check that the distribution is well-specified
check_distribution <- function(x, distribution, df, ..., call = caller_env()) {
  dist <- tolower(distribution)

  if (!dist %in% c("f", "chisq", "t", "z")) {
    cli_abort(
      'The distribution argument must be one of "Chisq", "F", "t", or "z".',
      call = call
    )
  }

  if (
    (dist == "f" && attr(x, "theory_type") != "ANOVA") ||
      (dist == "chisq" &&
        !attr(x, "theory_type") %in%
          c("Chi-square test of indep", "Chi-square Goodness of Fit")) ||
      (dist == "t" &&
        !attr(x, "theory_type") %in% c("One sample t", "Two sample t")) ||
      (dist == "z" &&
        !attr(x, "theory_type") %in%
          c("One sample prop z", "Two sample props z"))
  ) {
    if (has_explanatory(x)) {
      msg_tail <- glue(
        "a {get_stat_type_desc(attr(x, 'type_desc_explanatory'))} ",
        "explanatory variable ({explanatory_name(x)}).",
        .null = "NULL"
      )
    } else {
      msg_tail <- "no explanatory variable."
    }

    cli_abort(
      'The supplied distribution {.val {distribution}} is not well-defined for a \\
      {get_stat_type_desc(attr(x, "type_desc_response"))} response \\
      variable ({response_name(x)}) and {msg_tail}',
      call = call
    )
  }

  if (!is.numeric(df) && !is.null(df)) {
    cli_abort(
      "{.fun assume} expects the {.arg df} argument to be a numeric vector, \\
       but you supplied a {list(class(df))} object.",
      call = call
    )
  }

  if (length(list(...)) != 0) {
    dots <- list(...)

    cli_abort(
      c(
        "{.fun assume} ignores the dots `...` argument, though the \\
       {qty(dots)}argument{?s} {.field {names(dots)}} {?was/were} supplied. ",
        i = "Did you forget to concatenate the {.arg df} argument with {.fun c}?"
      ),
      call = call
    )
  }

  if (dist_df_length(distribution) != length(df) && !is.null(df)) {
    cli_abort(
      '{distribution_desc(distribution)} distribution requires \\
       {dist_df_length(distribution)} degrees of freedom argument{?s}, \\
       but {length(df)} {?was/were} supplied.',
      call = call
    )
  }

  df <- determine_df(x, dist, df)

  return(df)
}

# convert the distribution argument to its r distribution function suffix
dist_fn <- function(distribution) {
  switch(
    tolower(distribution),
    `f` = "f",
    `chisq` = "chisq",
    `t` = "t",
    `z` = "norm"
  )
}

# return expected degrees of freedom length
dist_df_length <- function(distribution) {
  switch(
    tolower(distribution),
    `f` = 2,
    `chisq` = ,
    `t` = 1,
    `z` = 0
  )
}

# describe the distribution
distribution_desc <- function(distribution) {
  switch(
    tolower(distribution),
    `f` = "An F",
    `chisq` = "A Chi-squared",
    `t` = "A T",
    `z` = "A Z"
  )
}

# describe the degrees of freedom
df_desc <- function(df) {
  if (is.null(df)) {
    ""
  } else {
    plural <- length(df) != 1

    paste0(
      ' with ',
      if (plural) {
        paste0(round(df), collapse = " and ")
      } else {
        round(df)
      },
      ' degree',
      if (!plural && df == 1) {
        ''
      } else {
        's'
      },
      ' of freedom'
    )
  }
}

# process df for passing to p* functions
process_df <- function(df) {
  switch(
    as.character(length(df)),
    "0" = list(),
    "1" = list(df = df),
    "2" = list(df1 = df[1], df2 = df[2])
  )
}

# generate an automatic "df" value using logic from
# hypothesize and, if it doesn't match the
# supplied one, raise a message
determine_df <- function(x, dist, df) {
  if (!is.null(df) && !all(round(df) %in% round(acceptable_dfs(x)))) {
    cli_inform(
      "Message: The supplied {.arg df} argument does not match its \\
       expected value. If this is unexpected, ensure that your calculation \\
       for {.arg df} is correct (see {.help [{.fun assume}](infer::assume)} for \\
       recognized values) or supply {.code df = NULL} to {.fun assume}."
    )

    return(df)
  }

  if (is.null(df)) {
    df <- acceptable_dfs(x)
  }

  if (attr(x, "theory_type") == "Two sample t") {
    df <- df[1]
  }

  df
}

# return a vector of dfs recognized by `assume`
acceptable_dfs <- function(x) {
  # base R pipe doesn't support operators or anonymous functions
  # in piped expressions (#553)
  minus_one <- function(x) {x - 1}
  minus_two <- function(x) {x - 2}

  if (attr(x, "theory_type") == "Two sample t") {
    c(
      # t.test param with var.equal = FALSE
      unname(
        unlist(
          attr(x, "distr_param") <-
            stats::t.test(response_variable(x) ~ explanatory_variable(x))[[
              "parameter"
            ]]
        )
      ),
      # t.test param with var.equal = TRUE
      unname(
        unlist(
          attr(x, "distr_param") <-
            stats::t.test(
              response_variable(x) ~ explanatory_variable(x),
              var.equal = TRUE
            )[["parameter"]]
        )
      ),
      # min(n1 - 1, n2 - 1)
      x |>
        dplyr::count(!!explanatory_expr(x)) |>
        dplyr::pull(n) |>
        min() |>
        minus_one(),
      # n1 + n2 - 2
      x |>
        dplyr::count(!!explanatory_expr(x)) |>
        dplyr::pull(n) |>
        sum() |>
        minus_two()
    )
  } else {
    c(
      unname(unlist(attr(x, "distr_param"))),
      unname(unlist(attr(x, "distr_param2")))
    )
  }
}


================================================
FILE: R/calculate.R
================================================
#' Calculate summary statistics
#'
#' @description
#'
#' Given the output of [specify()] and/or [hypothesize()], this function will
#' return the observed statistic specified with the `stat` argument. Some test
#' statistics, such as `Chisq`, `t`, and `z`, require a null hypothesis. If
#' provided the output of [generate()], the function will calculate the
#' supplied `stat` for each `replicate`.
#'
#' Learn more in `vignette("infer")`.
#'
#' @param x The output from [generate()] for computation-based inference or the
#'   output from [hypothesize()] piped in to here for theory-based inference.
#' @param stat A string giving the type of the statistic to calculate or a
#'   function that takes in a replicate of `x` and returns a scalar value. Current
#'   options include `"mean"`, `"median"`, `"sum"`, `"sd"`, `"prop"`, `"count"`,
#'   `"diff in means"`, `"diff in medians"`, `"diff in props"`, `"Chisq"` (or
#'   `"chisq"`), `"F"` (or `"f"`), `"t"`, `"z"`, `"ratio of props"`, `"slope"`,
#'   `"odds ratio"`, `"ratio of means"`, and `"correlation"`. `infer` only
#'   supports theoretical tests on one or two means via the `"t"` distribution
#'   and one or two proportions via the `"z"`. See the "Arbitrary test statistics"
#'   section below for more on how to define a custom statistic.
#' @param order A string vector of specifying the order in which the levels of
#'   the explanatory variable should be ordered for subtraction (or division
#'   for ratio-based statistics), where `order = c("first", "second")` means
#'   `("first" - "second")`, or the analogue for ratios. Needed for inference on
#'   difference in means, medians, proportions, ratios, t, and z statistics.
#' @param ... To pass options like `na.rm = TRUE` into functions like
#'   [mean()][base::mean()], [sd()][stats::sd()], etc. Can also be used to
#'   supply hypothesized null values for the `"t"` statistic or additional
#'   arguments to [stats::chisq.test()].
#'
#' @return A tibble containing a `stat` column of calculated statistics.
#'
#' @section Arbitrary test statistics:
#'
#' In addition to the pre-implemented statistics documented in `stat`, users can
#' supply an arbitrary test statistic by supplying a function to the `stat`
#' argument.
#'
#' The function should have arguments `stat(x, order, ...)`, where `x` is one
#' replicate's worth of `x`. The `order` argument and ellipses will be supplied
#' directly to the `stat` function. Internally, `calculate()` will split `x` up
#' into data frames by replicate and pass them one-by-one to the supplied `stat`.
#' For example, to implement `stat = "mean"` as a function, one could write:
#'
#' ```r
#' stat_mean <- function(x, order, ...) {mean(x$hours)}
#' obs_mean <-
#'   gss %>%
#'   specify(response = hours) %>%
#'   calculate(stat = stat_mean)
#'
#' set.seed(1)
#' null_dist_mean <-
#'   gss %>%
#'   specify(response = hours) %>%
#'   hypothesize(null = "point", mu = 40) %>%
#'   generate(reps = 5, type = "bootstrap") %>%
#'   calculate(stat = stat_mean)
#' ```
#'
#' Note that the same `stat_mean` function is supplied to both `generate()`d and
#' non-`generate()`d infer objects--no need to implement support for grouping
#' by `replicate` yourself.
#'
#' @section Missing levels in small samples:
#' In some cases, when bootstrapping with small samples, some generated
#' bootstrap samples will have only one level of the explanatory variable
#' present. For some test statistics, the calculated statistic in these
#' cases will be NaN. The package will omit non-finite values from
#' visualizations (with a warning) and raise an error in p-value calculations.
#'
#' @includeRmd man-roxygen/seeds.Rmd
#'
#' @examples
#'
#' # calculate a null distribution of hours worked per week under
#' # the null hypothesis that the mean is 40
#' gss |>
#'   specify(response = hours) |>
#'   hypothesize(null = "point", mu = 40) |>
#'   generate(reps = 200, type = "bootstrap") |>
#'   calculate(stat = "mean")
#'
#' # calculate the corresponding observed statistic
#' gss |>
#'   specify(response = hours) |>
#'   calculate(stat = "mean")
#'
#' # calculate a null distribution assuming independence between age
#' # of respondent and whether they have a college degree
#' gss |>
#'   specify(age ~ college) |>
#'   hypothesize(null = "independence") |>
#'   generate(reps = 200, type = "permute") |>
#'   calculate("diff in means", order = c("degree", "no degree"))
#'
#' # calculate the corresponding observed statistic
#' gss |>
#'   specify(age ~ college) |>
#'   calculate("diff in means", order = c("degree", "no degree"))
#'
#' # some statistics require a null hypothesis
#'  gss |>
#'    specify(response = hours) |>
#'    hypothesize(null = "point", mu = 40) |>
#'    calculate(stat = "t")
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @seealso [visualize()], [get_p_value()], and [get_confidence_interval()]
#' to extract value from this function's outputs.
#'
#' @importFrom dplyr group_by summarize n
#' @importFrom rlang !! sym quo enquo eval_tidy
#' @family core functions
#' @export
calculate <- function(
  x,
  stat = c(
    "mean",
    "median",
    "sum",
    "sd",
    "prop",
    "count",
    "diff in means",
    "diff in medians",
    "diff in props",
    "Chisq",
    "F",
    "slope",
    "correlation",
    "t",
    "z",
    "ratio of props",
    "odds ratio",
    "ratio of means"
  ),
  order = NULL,
  ...
) {
  check_type(x, tibble::is_tibble)
  check_if_mlr(x, "calculate")
  stat_chr <- stat_chr(stat)
  stat_chr <- check_calculate_stat(stat_chr)
  check_input_vs_stat(x, stat_chr)
  check_point_params(x, stat_chr)

  order <- check_order(x, order, in_calculate = TRUE, stat_chr)

  if (!is_generated(x)) {
    x$replicate <- 1L
  }

  x <- message_on_excessive_null(x, stat = stat_chr, fn = "calculate")
  x <- warn_on_insufficient_null(x, stat_chr, ...)

  # Use S3 method to match correct calculation
  result <- calc_impl(
    structure(stat, class = gsub(" ", "_", stat_chr)),
    x,
    order,
    ...
  )

  result <- copy_attrs(to = result, from = x)
  attr(result, "stat") <- stat

  # For returning a 1x1 observed statistic value
  if (nrow(result) == 1) {
    result <- select(result, stat)
  }

  append_infer_class(result)
}

check_if_mlr <- function(x, fn, call = caller_env()) {
  if (fn == "calculate") {
    suggestion <-
      "When working with multiple explanatory variables, use \\
        {.help [{.fun fit}](infer::fit.infer)} instead."
  } else {
    suggestion <- ""
  }

  if (is_mlr(x)) {
    cli_abort(
      c(
        "Multiple explanatory variables are not supported in {.fun {fn}}.",
        i = suggestion
      ),
      call = call
    )
  }
}

stat_chr <- function(stat) {
  if (rlang::is_function(stat)) {
    return("function")
  }

  stat
}

check_calculate_stat <- function(stat, call = caller_env()) {
  check_type(stat, rlang::is_string, call = call)
  if (identical(stat, "function")) {
    return(stat)
  }

  # Check for possible `stat` aliases
  alias_match_id <- match(stat, implemented_stats_aliases[["alias"]])
  if (!is.na(alias_match_id)) {
    stat <- implemented_stats_aliases[["target"]][alias_match_id]
  } else {
    rlang::arg_match(stat, implemented_stats)
  }

  stat
}

# Raise an error if the user supplies a test statistic that doesn't
# make sense given the variable and hypothesis specified
check_input_vs_stat <- function(x, stat, call = caller_env()) {
  response_type <- attr(x, "type_desc_response")
  explanatory_type <- attr(x, "type_desc_explanatory")

  possible_stats <- stat_types |>
    dplyr::filter(resp == response_type & exp == explanatory_type) |>
    dplyr::pull(stats) |>
    unlist()

  if (is.null(possible_stats)) {
    cli_abort(
      "The infer team has not implemented test statistics for the \\
       supplied variable types.",
      call = call
    )
  }

  if (identical(stat, "function")) {
     return(x)
  }

  if (!stat %in% possible_stats) {
    if (has_explanatory(x)) {
      msg_tail <- glue(
        "a {get_stat_type_desc(explanatory_type)} explanatory variable ",
        "({explanatory_name(x)}).",
        .null = "NULL"
      )
    } else {
      msg_tail <- "no explanatory variable."
    }

    cli_abort(
      "{get_stat_desc(stat)} is not well-defined for a \\
       {get_stat_type_desc(response_type)} response variable \\
       ({response_name(x)}) and {msg_tail}",
      call = call
    )
  }

  if (is_hypothesized(x)) {
    stat_nulls <- stat_hypotheses |>
      dplyr::filter(
        stat == !!stat &
          hypothesis == attr(x, "null")
      )

    if (nrow(stat_nulls) == 0) {
      cli_abort(
        'The supplied statistic `stat = "{stat}"` is incompatible with the \\
         supplied hypothesis `null = "{attr(x, "null")}"`.',
        call = call
      )
    }
  }

  x
}

# When given no hypothesis for a theorized statistic, supply a reasonable value
.subset_1 <- function(x) {x[[1]]}
assume_null <- function(x, stat_) {
  null_fn <- theorized_nulls |>
    dplyr::filter(stat == stat_) |>
    dplyr::pull(null_fn) |>
    .subset_1()

  null_fn(x)
}


# User supplied "too much" information - hypothesized a value for a point
# estimate that isn't relevant to the statistic calculation
#
# The `stat = "mean"` default ensures that `stat %in% untheorized_stats`
# when called in non-`calculate` functions
message_on_excessive_null <- function(x, stat = "mean", fn) {
  if (!is_generated(x) && is_hypothesized(x) && stat %in% untheorized_stats) {
    null_type <- attr(x, "null")
    null_param <- attr(x, "params")

    cli_inform(
      "Message: The {null_type} null hypothesis \\
       {if (null_type == 'point') {paste0('`', names(null_param), ' = ', unname(null_param), '` ')} else {''}} \\
       does not inform calculation of the observed \\
       {if (fn == 'calculate') {paste0('statistic (', tolower(get_stat_desc(stat)), ') ')} else {'fit '}} \\
       and will be ignored."
    )
  }

  x
}

# User didn't supply "enough" information - no hypothesis for a theorized
# statistic on a point estimate, so warn that a reasonable value was assumed.
warn_on_insufficient_null <- function(x, stat, ...) {
  if (
    !is_hypothesized(x) &&
      !has_explanatory(x) &&
      !stat %in% c(untheorized_stats, "function") &&
      !(stat == "t" && "mu" %in% names(list(...)))
  ) {
    attr(x, "null") <- "point"
    attr(x, "params") <- assume_null(x, stat)

    cli_warn(c(
      "{get_stat_desc(stat)} requires a null \\
       hypothesis to calculate the observed statistic.",
      "Output assumes the following null value{print_params(x)}."
    ))
  }

  x
}

calc_impl <- function(type, x, order, ...) {
  UseMethod("calc_impl", type)
}

calc_impl_one_f <- function(f) {
  function(type, x, order, ...) {
    col <- base::setdiff(names(x), "replicate")

    if (!identical(dplyr::group_vars(x), "replicate")) {
      x <- dplyr::group_by(x, replicate)
    }

    res <- x |>
      dplyr::summarize(stat = f(!!(sym(col)), ...))

    # calculate SE for confidence intervals
    if (!is_generated(x)) {
      sample_sd <- x |>
        dplyr::summarize(stats::sd(!!(sym(col)))) |>
        dplyr::pull()

      attr(res, "se") <- sample_sd / sqrt(nrow(x))
    }

    res
  }
}

#' @export
calc_impl.mean <- calc_impl_one_f(mean)

#' @export
calc_impl.median <- calc_impl_one_f(stats::median)

#' @export
calc_impl.sum <- calc_impl_one_f(sum)

#' @export
calc_impl.sd <- calc_impl_one_f(stats::sd)

calc_impl_success_f <- function(f, output_name) {
  function(type, x, order, ...) {
    col <- base::setdiff(names(x), "replicate")

    success <- attr(x, "success")

    if (!identical(dplyr::group_vars(x), "replicate")) {
      x <- dplyr::group_by(x, replicate)
    }

    res <- x |>
      dplyr::summarize(stat = f(!!sym(col), success))

    # calculate SE for confidence intervals
    if (!is_generated(x) && output_name == "proportion") {
      prop <- res[["stat"]]

      attr(res, "se") <- sqrt((prop * (1 - prop)) / nrow(x))
    }

    res
  }
}

#' @export
calc_impl.prop <- calc_impl_success_f(
  f = function(response, success, ...) {
    mean(response == success, ...)
  },
  output_name = "proportion"
)

#' @export
calc_impl.count <- calc_impl_success_f(
  f = function(response, success, ...) {
    sum(response == success, ...)
  },
  output_name = "count"
)

#' @export
calc_impl.F <- function(type, x, order, ...) {
  x |>
    dplyr::summarize(
      stat = stats::anova(
        stats::lm(!!(response_expr(x)) ~ !!(explanatory_expr(x)))
      )$`F value`[1]
    )
}

#' @export
calc_impl.slope <- function(type, x, order, ...) {
  x |>
    dplyr::summarize(
      stat = stats::coef(
        stats::lm(!!(response_expr(x)) ~ !!(explanatory_expr(x)))
      )[2]
    )
}

#' @export
calc_impl.correlation <- function(type, x, order, ...) {
  x |>
    dplyr::summarize(
      stat = stats::cor(!!explanatory_expr(x), !!response_expr(x))
    )
}

calc_impl_diff_f <- function(f, operator) {
  function(type, x, order, ...) {
    res <- x |>
      dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) |>
      dplyr::summarize(value = f(!!response_expr(x), ...)) |>
      dplyr::group_by(replicate) |>
      dplyr::summarize(
        stat = operator(
          value[!!(explanatory_expr(x)) == order[1]],
          value[!!(explanatory_expr(x)) == order[2]]
        )
      )

    # calculate SE for confidence intervals
    if (!is_generated(x) && identical(operator, `-`)) {
      sample_sds <- x |>
        dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) |>
        dplyr::summarize(stats::sd(!!response_expr(x))) |>
        dplyr::pull()

      sample_counts <- x |>
        dplyr::count(!!explanatory_expr(x), .drop = FALSE) |>
        dplyr::pull()

      attr(res, "se") <-
        sqrt(
          sum(
            (sample_sds[1] / sqrt(sample_counts[1]))^2,
            (sample_sds[2] / sqrt(sample_counts[2]))^2
          )
        )
    }

    res
  }
}

#' @export
calc_impl.diff_in_means <- calc_impl_diff_f(mean, operator = `-`)

#' @export
calc_impl.diff_in_medians <- calc_impl_diff_f(stats::median, operator = `-`)

#' @export
calc_impl.ratio_of_means <- calc_impl_diff_f(mean, operator = `/`)

#' @export
calc_impl.Chisq <- function(type, x, order, ...) {
  resp_var <- response_name(x)

  if (!has_attr(x, "explanatory")) {
    # Chi-Square Goodness of Fit
    p_levels <- get_par_levels(x)
    chisq_gof <- function(df) {
      chisq <- suppressWarnings(stats::chisq.test(
        # Ensure correct ordering of parameters
        table(df[[resp_var]])[p_levels],
        p = attr(x, "params"),
        ...
      ))

      unname(chisq[["statistic"]])
    }

    result <- x |>
      dplyr::nest_by(.key = "data") |>
      dplyr::summarise(stat = chisq_gof(data), .groups = "drop")
  } else {
    # Chi-Square Test of Independence
    expl_var <- explanatory_name(x)
    chisq_indep <- function(df) {
      res <- suppressWarnings(stats::chisq.test(
        x = df[[expl_var]],
        y = df[[resp_var]],
        ...
      ))

      res[["statistic"]]
    }

    # Compute result
    result <- x |>
      dplyr::nest_by(.key = "data") |>
      dplyr::summarise(stat = chisq_indep(data), .groups = "drop")
  }

  if (is_generated(x)) {
    result <- result |> dplyr::select(replicate, stat)
  } else {
    result <- result |> dplyr::select(stat)
  }

  copy_attrs(
    to = result,
    from = x,
    attrs = c(
      "response",
      "success",
      "explanatory",
      "response_type",
      "explanatory_type",
      "distr_param",
      "distr_param2",
      "theory_type",
      "type_desc_response",
      "type_desc_explanatory"
    )
  )
}

#' @export
calc_impl.function_of_props <- function(type, x, order, operator, ...) {
  col <- response_expr(x)
  success <- attr(x, "success")

  res <- x |>
    dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) |>
    dplyr::summarize(prop = mean(!!sym(col) == success, ...)) |>
    dplyr::summarize(
      stat = operator(
        prop[!!explanatory_expr(x) == order[1]],
        prop[!!explanatory_expr(x) == order[2]]
      )
    )

  # calculate SE for confidence intervals
  if (!is_generated(x)) {
    props <- x |>
      dplyr::group_by(!!explanatory_expr(x), .drop = FALSE) |>
      dplyr::summarize(prop = mean(!!sym(col) == success, ...)) |>
      dplyr::pull()

    counts <- x |>
      dplyr::count(!!explanatory_expr(x), .drop = FALSE) |>
      dplyr::pull()

    attr(res, "se") <-
      sqrt(
        sum(
          abs((props[1] * (1 - props[1])) / counts[1]),
          abs((props[2] * (1 - props[2])) / counts[2])
        )
      )
  }

  res
}

#' @export
calc_impl.diff_in_props <- function(type, x, order, ...) {
  calc_impl.function_of_props(type, x, order, operator = `-`, ...)
}

#' @export
calc_impl.ratio_of_props <- function(type, x, order, ...) {
  calc_impl.function_of_props(type, x, order, operator = `/`, ...)
}

#' @export
calc_impl.odds_ratio <- function(type, x, order, ...) {
  col <- response_expr(x)
  success <- attr(x, "success")

  x |>
    dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) |>
    dplyr::summarize(prop = mean(!!sym(col) == success, ...)) |>
    dplyr::summarize(
      prop_1 = prop[!!explanatory_expr(x) == order[1]],
      prop_2 = prop[!!explanatory_expr(x) == order[2]],
      stat = (prop_1 / prop_2) / ((1 - prop_1) / (1 - prop_2))
    ) |>
    dplyr::select(stat)
}

#' @export
calc_impl.t <- function(type, x, order, ...) {
  if (theory_type(x) == "Two sample t") {
    x <- reorder_explanatory(x, order)

    df_out <- x |>
      dplyr::summarize(
        stat = stats::t.test(
          !!response_expr(x) ~ !!explanatory_expr(x),
          ...
        )[["statistic"]]
      )
  } else if (theory_type(x) == "One sample t") {
    if (!is_hypothesized(x)) {
      # For bootstrap
      df_out <- x |>
        dplyr::summarize(
          stat = stats::t.test(!!response_expr(x), ...)[["statistic"]]
        )
    } else {
      # For hypothesis testing
      df_out <- x |>
        dplyr::summarize(
          stat = stats::t.test(
            !!response_expr(x),
            mu = attr(!!x, "params"),
            ...
          )[["statistic"]]
        )
    }
  }
  df_out
}

#' @export
calc_impl.z <- function(type, x, order, ...) {
  # Two sample proportions
  if (theory_type(x) == "Two sample props z") {
    col <- response_expr(x)
    success <- attr(x, "success")

    x$explan <- factor(
      explanatory_variable(x),
      levels = c(order[1], order[2])
    )

    aggregated <- x |>
      dplyr::group_by(replicate, explan) |>
      dplyr::summarize(
        group_num = dplyr::n(),
        prop = mean(rlang::eval_tidy(col) == rlang::eval_tidy(success)),
        num_suc = sum(rlang::eval_tidy(col) == rlang::eval_tidy(success))
      )

    df_out <- aggregated |>
      dplyr::summarize(
        diff_prop = prop[explan == order[1]] - prop[explan == order[2]],
        total_suc = sum(num_suc),
        n1 = group_num[1],
        n2 = group_num[2],
        p_hat = total_suc / (n1 + n2),
        denom = sqrt(p_hat * (1 - p_hat) / n1 + p_hat * (1 - p_hat) / n2),
        stat = diff_prop / denom
      ) |>
      dplyr::select(stat)

    df_out
  } else if (theory_type(x) == "One sample prop z") {
    # One sample proportion

    # When `hypothesize()` has been called
    success <- attr(x, "success")
    col <- response_expr(x)
    p0 <- unname(attr(x, "params")[1])
    num_rows <- nrow(x) / length(unique(x$replicate))

    df_out <- x |>
      dplyr::summarize(
        stat = (mean(rlang::eval_tidy(col) == rlang::eval_tidy(success), ...) -
          p0) /
          sqrt((p0 * (1 - p0)) / num_rows)
      )

    df_out
  }
}

#' @export
calc_impl.function <- function(type, x, order, ..., call = rlang::caller_env()) {
  rlang::try_fetch(
    {
      if (!identical(dplyr::group_vars(x), "replicate")) {
         x <- dplyr::group_by(x, replicate)
      }
      x_by_replicate <- dplyr::group_split(x)
      res <- purrr::map(x_by_replicate, ~type(.x, order, ...))
    },
    error = function(cnd) {rethrow_stat_cnd(cnd, call = call)},
    warning = function(cnd) {rethrow_stat_cnd(cnd, call = call)}
  )

  if (!rlang::is_scalar_atomic(res[[1]])) {
    cli::cli_abort(
      c(
        "The supplied {.arg stat} function must return a scalar value.",
        "i" = "It returned {.obj_type_friendly {res[[1]]}}."
      ),
      call = call
    )
  }

  tibble::new_tibble(list(stat = unlist(res)))
}

rethrow_stat_cnd <- function(cnd, call = call) {
  cli::cli_abort(
    "The supplied {.arg stat} function encountered an issue.",
    parent = cnd,
    call = call
  )
}


================================================
FILE: R/deprecated.R
================================================
#' Deprecated functions and objects
#'
#' These functions and objects should no longer be used. They will be removed
#' in a future release of infer.
#' @param x See the non-deprecated function.
#' @param level See the non-deprecated function.
#' @param type See the non-deprecated function.
#' @param point_estimate See the non-deprecated function.
#' @param obs_stat See the non-deprecated function.
#' @param direction See the non-deprecated function.
#' @seealso [get_p_value()], [get_confidence_interval()], [generate()]
#' @name deprecated
NULL


#' @rdname deprecated
#' @export
conf_int <- function(
  x,
  level = 0.95,
  type = "percentile",
  point_estimate = NULL
) {
  lifecycle::deprecate_stop("0.4.0", "conf_int()", "get_confidence_interval()")
}


#' @rdname deprecated
#' @export
p_value <- function(x, obs_stat, direction) {
  lifecycle::deprecate_stop("0.4.0", "conf_int()", "get_p_value()")
}


================================================
FILE: R/fit.R
================================================
#' @importFrom generics fit
#' @details
#' Read more about infer's [fit][fit.infer()] function [here][fit.infer()] or
#' by running `?fit.infer` in your console.
#'
#' @export
generics::fit


#' Fit linear models to infer objects
#'
#' @description
#' Given the output of an infer core function, this function will fit
#' a linear model using [stats::glm()] according to the formula and data supplied
#' earlier in the pipeline. If passed the output of [specify()] or
#' [hypothesize()], the function will fit one model. If passed the output
#' of [generate()], it will fit a model to each data resample, denoted in
#' the `replicate` column. The family of the fitted model depends on the type
#' of the response variable. If the response is numeric, `fit()` will use
#' `family = "gaussian"` (linear regression). If the response is a 2-level
#' factor or character, `fit()` will use `family = "binomial"` (logistic
#' regression). To fit character or factor response variables with more than
#' two levels, we recommend [parsnip::multinom_reg()].
#'
#' infer provides a fit "method" for infer objects, which is a way of carrying
#' out model fitting as applied to infer output. The "generic," imported from
#' the generics package and re-exported from this package, provides the
#' general form of `fit()` that points to infer's method when called on an
#' infer object. That generic is also documented here.
#'
#' Learn more in `vignette("infer")`.
#'
#' @param object Output from an infer function---likely [generate()] or
#' [specify()]---which specifies the formula and data to fit a model to.
#' @param ... Any optional arguments to pass along to the model fitting
#' function. See [stats::glm()] for more information.
#'
#' @return A [tibble][tibble::tibble] containing the following columns:
#'
#' \itemize{
#'   \item `replicate`: Only supplied if the input object had been previously
#'     passed to [generate()]. A number corresponding to which resample of the
#'     original data set the model was fitted to.
#'   \item `term`: The explanatory variable (or intercept) in question.
#'   \item `estimate`: The model coefficient for the given resample (`replicate`) and
#'     explanatory variable (`term`).
#' }
#'
#' @details
#'
#' Randomization-based statistical inference with multiple explanatory
#' variables requires careful consideration of the null hypothesis in question
#' and its implications for permutation procedures. Inference for partial
#' regression coefficients via the permutation method implemented in
#' [generate()] for multiple explanatory variables, consistent with its meaning
#' elsewhere in the package, is subject to additional distributional assumptions
#' beyond those required for one explanatory variable. Namely, the distribution
#' of the response variable must be similar to the distribution of the errors
#' under the null hypothesis' specification of a fixed effect of the explanatory
#' variables. (This null hypothesis is reflected in the `variables` argument to
#' [generate()]. By default, all of the explanatory variables are treated
#' as fixed.) A general rule of thumb here is, if there are large outliers
#' in the distributions of any of the explanatory variables, this distributional
#' assumption will not be satisfied; when the response variable is permuted,
#' the (presumably outlying) value of the response will no longer be paired
#' with the outlier in the explanatory variable, causing an outsize effect
#' on the resulting slope coefficient for that explanatory variable.
#'
#' More sophisticated methods that are outside of the scope of this package
#' requiring fewer---or less strict---distributional assumptions
#' exist. For an overview, see "Permutation tests for univariate or
#' multivariate analysis of variance and regression" (Marti J. Anderson,
#' 2001), \doi{10.1139/cjfas-58-3-626}.
#'
#' @includeRmd man-roxygen/seeds.Rmd
#'
#' @examples
#' # fit a linear model predicting number of hours worked per
#' # week using respondent age and degree status.
#' observed_fit <- gss |>
#'   specify(hours ~ age + college) |>
#'   fit()
#'
#' observed_fit
#'
#' # fit 100 models to resamples of the gss dataset, where the response
#' # `hours` is permuted in each. note that this code is the same as
#' # the above except for the addition of the `generate` step.
#' null_fits <- gss |>
#'   specify(hours ~ age + college) |>
#'   hypothesize(null = "independence") |>
#'   generate(reps = 100, type = "permute") |>
#'   fit()
#'
#' null_fits
#'
#' # for logistic regression, just supply a binary response variable!
#' # (this can also be made explicit via the `family` argument in ...)
#' gss |>
#'   specify(college ~ age + hours) |>
#'   fit()
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @rdname fit.infer
#' @method fit infer
#' @export fit.infer
#' @export
fit.infer <- function(object, ...) {
  message_on_excessive_null(object, fn = "fit")

  # Confirm that the family, possibly supplied via
  # `family` in ..., takes precedence over the default.
  # Return a processed version of the ellipses
  dots <- check_family(object, ...)

  # Relevel the response based on the success attribute
  # so that the reference level is reflected in the fit
  object <- relevel_response(object)

  # Extract the formula if it was supplied to specify, otherwise
  # construct it out of the explanatory and response arguments
  formula <- get_formula(object)

  if (is_generated(object)) {
    x <- object |>
      tidyr::nest(data = -replicate) |>
      dplyr::rowwise() |>
      dplyr::mutate(
        model = list(
          do.call(
            fit_linear_model,
            c(
              list(object = data, formula = formula),
              dots
            )
          )
        )
      ) |>
      dplyr::select(replicate, model) |>
      tidyr::unnest(model)
  } else {
    x <- do.call(
      fit_linear_model,
      c(
        list(object, formula),
        dots
      )
    )
  }

  x <- copy_attrs(x, object)
  attr(x, "fitted") <- TRUE

  x
}

check_family <- function(object, ..., call = caller_env()) {
  response_type <- attr(object, "type_desc_response")

  if (response_type == "mult") {
    cli_abort(
      c(
        "infer does not support fitting models for categorical response variables \\
       with more than two levels.",
        i = "Please see {.fun multinom_reg} from the parsnip package."
      ),
      call = call
    )
  }

  dots <- list(...)

  if ("family" %in% names(dots)) {
    return(dots)
  }

  if (response_type == "bin") {
    dots[["family"]] <- stats::binomial
  } else {
    dots[["family"]] <- stats::gaussian
  }

  dots
}

relevel_response <- function(x) {
  if (!is.null(attr(x, "success"))) {
    x[[response_name(x)]] <-
      stats::relevel(
        response_variable(x),
        ref = attr(x, "success")
      )
  }

  x
}

get_formula <- function(x) {
  if (has_attr(x, "formula")) {
    return(attr(x, "formula"))
  } else {
    exp <- paste0(explanatory_name(x), collapse = " + ")

    as.formula(
      glue(
        '{response_name(x)} ~
         {if (exp == "") NULL else exp}',
        .null = "NULL"
      )
    )
  }
}

fit_linear_model <- function(object, formula, ...) {
  stats::glm(
    formula = formula,
    data = object,
    ...
  ) |>
    broom::tidy() |>
    dplyr::select(
      term,
      estimate
    ) |>
    dplyr::mutate(
      term = dplyr::case_when(
        term == "(Intercept)" ~ "intercept",
        TRUE ~ term
      )
    )
}


================================================
FILE: R/generate.R
================================================
#' Generate resamples, permutations, or simulations
#'
#' @description
#'
#' Generation creates a simulated distribution from `specify()`.
#' In the context of confidence intervals, this is a bootstrap distribution
#' based on the result of `specify()`. In the context of hypothesis testing,
#' this is a null distribution based on the result of `specify()` and
#' `hypothesize().`
#'
#' Learn more in `vignette("infer")`.
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @param reps The number of resamples to generate.
#' @param type The method used to generate resamples of the observed
#'   data reflecting the null hypothesis. Currently one of
#'   `"bootstrap"`, `"permute"`, or `"draw"` (see below).
#' @param variables If `type = "permute"`, a set of unquoted column names in the
#'   data to permute (independently of each other). Defaults to only the
#'   response variable. Note that any derived effects that depend on these
#'   columns (e.g., interaction effects) will also be affected.
#' @param ... Currently ignored.
#'
#' @return A tibble containing `reps` generated datasets, indicated by the
#'   `replicate` column.
#'
#' @section Generation Types:
#'
#' The `type` argument determines the method used to create the null
#' distribution.
#'
#' \itemize{
#'   \item `bootstrap`: A bootstrap sample will be drawn for each replicate,
#'   where a sample of size equal to the input sample size is drawn (with
#'   replacement) from the input sample data.
#'   \item `permute`: For each replicate, each input value will be randomly
#'   reassigned (without replacement) to a new output value in the sample.
#'   \item `draw`: A value will be sampled from a theoretical distribution
#'   with parameter `p` specified in [hypothesize()] for each replicate. This
#'   option is currently only applicable for testing on one proportion. This
#'   generation type was previously called `"simulate"`, which has been
#'   superseded.
#' }
#'
#' @includeRmd man-roxygen/seeds.Rmd
#'
#' @examples
#' # generate a null distribution by taking 200 bootstrap samples
#' gss |>
#'  specify(response = hours) |>
#'  hypothesize(null = "point", mu = 40) |>
#'  generate(reps = 200, type = "bootstrap")
#'
#' # generate a null distribution for the independence of
#' # two variables by permuting their values 200 times
#' gss |>
#'  specify(partyid ~ age) |>
#'  hypothesize(null = "independence") |>
#'  generate(reps = 200, type = "permute")
#'
#' # generate a null distribution via sampling from a
#' # binomial distribution 200 times
#' gss |>
#' specify(response = sex, success = "female") |>
#'   hypothesize(null = "point", p = .5) |>
#'   generate(reps = 200, type = "draw") |>
#'   calculate(stat = "z")
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @importFrom dplyr group_by
#' @family core functions
#' @export
generate <- function(
  x,
  reps = 1,
  type = NULL,
  variables = !!response_expr(x),
  ...
) {
  # Check type argument, warning if necessary
  type <- sanitize_generation_type(type)
  auto_type <- sanitize_generation_type(attr(x, "type"))
  type <- if (!is.null(type)) {
    compare_type_vs_auto_type(type, auto_type, x)
  } else {
    use_auto_type(auto_type)
  }
  attr(x, "type") <- type

  check_cols(x, rlang::enquo(variables), type, missing(variables))

  attr(x, "generated") <- TRUE

  switch(
    type,
    bootstrap = bootstrap(x, reps, ...),
    permute = {
      check_permutation_attributes(x)
      permute(x, reps, rlang::enquo(variables), ...)
    },
    draw = draw(x, reps, ...),
    simulate = draw(x, reps, ...)
  )
}

# Check that type argument is an implemented type
sanitize_generation_type <- function(x, call = caller_env()) {
  if (is.null(x)) return(x)

  check_type(x, is.character, call = call)

  if (!x %in% c("bootstrap", "permute", "simulate", "draw")) {
    cli_abort(
      'The `type` argument should be one of "bootstrap", "permute", \\
       or "draw". See {.help [{.fun generate}](infer::generate)} for more details.',
      call = call
    )
  }

  if (x == "simulate") {
    cli_inform(
      'The `"simulate"` generation type has been renamed to `"draw"`. \\
       Use `type = "draw"` instead to quiet this message.'
    )
  }

  x
}

# Ensure that the supplied type matches what would be assumed from input
compare_type_vs_auto_type <- function(type, auto_type, x) {
  if (is.null(auto_type)) {
    return(type)
  }

  if (
    (type == "bootstrap" && has_p_param(x)) ||
      (type != "bootstrap" &&
        auto_type != type &&
        # make sure auto_type vs type difference isn't just an alias
        (any(!c(auto_type, type) %in% c("draw", "simulate"))))
  ) {
    cli_warn(
      "You have given `type = \"{type}\"`, but `type` is expected \\
         to be `\"{auto_type}\"`. This workflow is untested and \\
         the results may not mean what you think they mean."
    )
  }

  type
}

has_p_param <- function(x) {
  if (!has_attr(x, "params")) {
    return(FALSE)
  }

  if (all(grepl("^p\\.", names(attr(x, "params"))))) {
    return(TRUE)
  }

  FALSE
}

use_auto_type <- function(auto_type) {
  if (!suppress_infer_messages()) {
    cli_inform('Setting `type = "{auto_type}"` in `generate()`.')
  }
  auto_type
}

check_permutation_attributes <- function(x, call = caller_env()) {
  if (
    any(!has_attr(x, "response"), !has_attr(x, "explanatory")) &&
      !identical(attr(x, "null"), "paired independence")
  ) {
    cli_abort(
      "Please {.fun specify} an explanatory and a response variable \\
        when permuting.",
      call = call
    )
  }
}

check_cols <- function(
  x,
  variables,
  type,
  missing,
  arg_name = "variables",
  call = caller_env()
) {
  if (!rlang::is_symbolic(rlang::get_expr(variables)) && type == "permute") {
    cli_abort(
      "The {.arg {arg_name}} argument should be one or more unquoted variable names \\
        (not strings in quotation marks).",
      call = call
    )
  }

  if (!missing && type != "permute") {
    cli_warn(
      'The {.arg {arg_name}} argument is only relevant for the "permute" \\
       generation type and will be ignored.'
    )

    should_prompt <- FALSE
  } else {
    should_prompt <- TRUE
  }

  col_names <- process_variables(variables, should_prompt)

  if (any(!col_names %in% colnames(x))) {
    bad_cols <- col_names[!col_names %in% colnames(x)]

    cli_abort(
      '{qty(bad_cols)}The column{?s} {.field {bad_cols}} provided to \\
       the {.arg {arg_name}} argument{qty(bad_cols)} {?is/are} not in the supplied data.',
      call = call
    )
  }
}

bootstrap <- function(x, reps = 1, ...) {
  # Check if hypothesis test chosen
  if (is_hypothesized(x)) {
    # If so, shift the variable chosen to have a mean corresponding
    # to that specified in `hypothesize`
    if (!is.null(attr(attr(x, "params"), "names"))) {
      if (identical(attr(attr(x, "params"), "names"), "mu")) {
        col <- response_name(x)
        x[[col]] <- x[[col]] - mean(x[[col]], na.rm = TRUE) + attr(x, "params")
      } # Similarly for median
      else if (identical(attr(attr(x, "params"), "names"), "med")) {
        col <- response_name(x)
        x[[col]] <- x[[col]] -
          stats::median(x[[col]], na.rm = TRUE) +
          attr(x, "params")
      }
    }
  }

  # Set variables for use in calculate()
  result <- rep_sample_n(x, size = nrow(x), replace = TRUE, reps = reps)
  result <- copy_attrs(to = result, from = x)

  append_infer_class(result)
}

#' @importFrom dplyr bind_rows group_by
permute <- function(x, reps = 1, variables, ..., call = caller_env()) {
  nrow_x <- nrow(x)
  df_out <- replicate(
    reps,
    permute_once(x, variables, call = call),
    simplify = FALSE
  ) |>
    dplyr::bind_rows() |>
    dplyr::mutate(replicate = rep(1:reps, each = !!nrow_x)) |>
    group_by_replicate(reps, nrow_x)

  df_out <- copy_attrs(to = df_out, from = x)

  append_infer_class(df_out)
}

permute_once <- function(x, variables, ..., call = caller_env()) {
  dots <- list(...)
  null <- attr(x, "null")

  if (
    !is_hypothesized(x) ||
      !null %in% c("independence", "paired independence")
  ) {
    cli_abort(
      "Permuting should be done only when doing an independence \\
        hypothesis test. See {.help [{.fun hypothesize}](infer::hypothesize)}.",
      call = call
    )
  }

  variables <- process_variables(variables, FALSE)
  if (null == "independence") {
    # for each column, determine whether it should be permuted
    needs_permuting <- colnames(x) %in% variables

    # pass each to permute_column with its associated logical
    out <- purrr::map2(x, needs_permuting, permute_column)
    out <- tibble::new_tibble(out)
  } else {
    out <- x
    signs <- sample(c(-1, 1), nrow(x), replace = TRUE, prob = c(.5, .5))
    out[[variables]] <- x[[variables]] * signs
  }

  copy_attrs(out, x)

  return(out)
}

process_variables <- function(variables, should_prompt) {
  # extract the expression and convert each element to string
  out <- rlang::get_expr(variables)

  if (length(out) == 1) {
    out <- as.character(out)
  } else {
    out <- as.list(out)
    out <- purrr::map(out, as.character)
  }

  # drop c()
  out[out == "c"] <- NULL

  # drop interactions and message
  interactions <- purrr::map_lgl(out, `%in%`, x = "*")

  if (any(interactions) && should_prompt) {
    cli_inform(
      "Message: Please supply only data columns to the {.arg variables} argument. \\
       Note that any derived effects that depend on these columns will also \\
       be affected."
    )
  }

  out <- out[!interactions]

  out
}

permute_column <- function(col, permute) {
  if (permute) {
    sample(col, size = length(col), replace = FALSE)
  } else {
    col
  }
}

#' @importFrom dplyr pull
#' @importFrom tibble tibble
#' @importFrom rlang :=
draw <- function(x, reps = 1, ...) {
  fct_levels <- as.character(unique(response_variable(x)))

  probs <- format_params(x)
  col_simmed <- unlist(replicate(
    reps,
    sample(fct_levels, size = nrow(x), replace = TRUE, prob = probs),
    simplify = FALSE
  ))

  x_nrow <- nrow(x)
  rep_tbl <- tibble::tibble(
    !!response_expr(x) := as.factor(col_simmed),
    replicate = as.factor(rep(1:reps, rep(x_nrow, reps)))
  )

  rep_tbl <- copy_attrs(to = rep_tbl, from = x)

  rep_tbl <- group_by_replicate(rep_tbl, reps, nrow(x))

  append_infer_class(rep_tbl)
}


================================================
FILE: R/get_confidence_interval.R
================================================
#' Compute confidence interval
#'
#' @description
#'
#' Compute a confidence interval around a summary statistic. Both
#' simulation-based and theoretical methods are supported, though only
#' `type = "se"` is supported for theoretical methods.
#'
#' Learn more in `vignette("infer")`.
#'
#' @param x A distribution. For simulation-based inference, a data frame
#'   containing a distribution of [calculate()]d statistics
#'   or [`fit()`][fit.infer()]ted coefficient estimates. This object should
#'   have been passed to [generate()] before being supplied or
#'   [calculate()] to [`fit()`][fit.infer()]. For theory-based inference,
#'   output of [assume()]. Distributions for confidence intervals do not
#'   require a null hypothesis via [hypothesize()].
#' @param level A numerical value between 0 and 1 giving the confidence level.
#'   Default value is 0.95.
#' @param type A string giving which method should be used for creating the
#'   confidence interval. The default is `"percentile"` with `"se"`
#'   corresponding to (multiplier * standard error) and `"bias-corrected"` for
#'   bias-corrected interval as other options.
#' @param point_estimate A data frame containing the observed statistic (in a
#'   [calculate()]-based workflow) or observed fit (in a
#'   [`fit()`][fit.infer()]-based workflow). This object is likely the output
#'   of [calculate()] or [`fit()`][fit.infer()] and need not
#'   to have been passed to [generate()]. Set to `NULL` by
#'   default. Must be provided if `type` is `"se"` or `"bias-corrected"`.
#'
#' @return A [tibble][tibble::tibble] containing the following columns:
#'
#' \itemize{
#'   \item `term`: The explanatory variable (or intercept) in question. Only
#'     supplied if the input had been previously passed to [`fit()`][fit.infer()].
#'   \item `lower_ci`, `upper_ci`: The lower and upper bounds of the confidence
#'     interval, respectively.
#' }
#'
#' @details
#' A null hypothesis is not required to compute a confidence interval. However,
#' including [hypothesize()] in a pipeline leading to `get_confidence_interval()`
#' will not break anything. This can be useful when computing a confidence
#' interval using the same distribution used to compute a p-value.
#'
#' Theoretical confidence intervals (i.e. calculated by supplying the output
#' of [assume()] to the `x` argument) require that the point estimate lies on
#' the scale of the data. The distribution defined in [assume()] will be
#' recentered and rescaled to align with the point estimate, as can be shown
#' in the output of [visualize()] when paired with [shade_confidence_interval()].
#' Confidence intervals are implemented for the following distributions and
#' point estimates:
#'
#' \itemize{
#'   \item `distribution = "t"`: `point_estimate` should be the output of
#'   [calculate()] with `stat = "mean"` or `stat = "diff in means"`
#'   \item `distribution = "z"`: `point_estimate` should be the output of
#'   [calculate()] with `stat = "prop"` or `stat = "diff in props"`
#' }
#'
#' @section Aliases:
#' `get_ci()` is an alias of `get_confidence_interval()`.
#' `conf_int()` is a deprecated alias of `get_confidence_interval()`.
#'
#' @examples
#'
#' boot_dist <- gss |>
#'   # We're interested in the number of hours worked per week
#'   specify(response = hours) |>
#'   # Generate bootstrap samples
#'   generate(reps = 1000, type = "bootstrap") |>
#'   # Calculate mean of each bootstrap sample
#'   calculate(stat = "mean")
#'
#' boot_dist |>
#'   # Calculate the confidence interval around the point estimate
#'   get_confidence_interval(
#'     # At the 95% confidence level; percentile method
#'     level = 0.95
#'   )
#'
#' # for type = "se" or type = "bias-corrected" we need a point estimate
#' sample_mean <- gss |>
#'   specify(response = hours) |>
#'   calculate(stat = "mean")
#'
#' boot_dist |>
#'   get_confidence_interval(
#'     point_estimate = sample_mean,
#'     # At the 95% confidence level
#'     level = 0.95,
#'     # Using the standard error method
#'     type = "se"
#'   )
#'
#' # using a theoretical distribution -----------------------------------
#'
#' # define a sampling distribution
#' sampling_dist <- gss |>
#'   specify(response = hours) |>
#'   assume("t")
#'
#' # get the confidence interval---note that the
#' # point estimate is required here
#' get_confidence_interval(
#'   sampling_dist,
#'   level = .95,
#'   point_estimate = sample_mean
#' )
#'
#' # using a model fitting workflow -----------------------
#'
#' # fit a linear model predicting number of hours worked per
#' # week using respondent age and degree status.
#' observed_fit <- gss |>
#'   specify(hours ~ age + college) |>
#'   fit()
#'
#' observed_fit
#'
#' # fit 100 models to resamples of the gss dataset, where the response
#' # `hours` is permuted in each. note that this code is the same as
#' # the above except for the addition of the `generate` step.
#' null_fits <- gss |>
#'   specify(hours ~ age + college) |>
#'   hypothesize(null = "independence") |>
#'   generate(reps = 100, type = "permute") |>
#'   fit()
#'
#' null_fits
#'
#' get_confidence_interval(
#'   null_fits,
#'   point_estimate = observed_fit,
#'   level = .95
#' )
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @name get_confidence_interval
#' @family auxillary functions
#' @export
get_confidence_interval <- function(
  x,
  level = 0.95,
  type = NULL,
  point_estimate = NULL
) {
  # Inform if no `level` was explicitly supplied
  if (!("level" %in% rlang::call_args_names(match.call())) && !suppress_infer_messages()) {
    cli_inform("Using `level = {level}` to compute confidence interval.")
  }

  if (is.null(type)) {
    if (inherits(x, "infer_dist")) {
      type <- "se"
    } else {
      type <- "percentile"
    }
  }

  if (is_fitted(x)) {
    # check that x and point estimate reference the same variables
    check_mlr_x_and_obs_stat(
      x,
      point_estimate,
      "get_confidence_interval",
      "point_estimate"
    )

    # split up x and point estimate by term
    term_data <- x |>
      dplyr::ungroup() |>
      dplyr::group_by(term) |>
      dplyr::group_split() |>
      purrr::map(copy_attrs, x)

    term_estimates <- point_estimate |>
      dplyr::ungroup() |>
      dplyr::group_by(term) |>
      dplyr::group_split()

    # check arguments for each term
    purrr::map2_dfr(
      term_data,
      purrr::map(term_estimates, purrr::pluck, "estimate"),
      check_ci_args,
      level = level,
      type = type
    )

    # map over switch_ci and then add the term column back in
    purrr::map2_dfr(
      term_data,
      purrr::map(term_estimates, purrr::pluck, "estimate"),
      switch_ci,
      level = level,
      type = type
    ) |>
      dplyr::mutate(
        term = purrr::map_chr(term_estimates, purrr::pluck, "term"),
        .before = dplyr::everything()
      ) |>
      copy_attrs(x)
  } else {
    check_ci_args(x, level, type, point_estimate)

    switch_ci(type, x, level, point_estimate)
  }
}

#' @rdname get_confidence_interval
#' @export
get_ci <- function(x, level = 0.95, type = NULL, point_estimate = NULL) {
  get_confidence_interval(
    x,
    level = level,
    type = type,
    point_estimate = point_estimate
  )
}

switch_ci <- function(type, x, level, point_estimate) {
  switch(
    type,
    percentile = ci_percentile(x, level),
    se = ci_se(x, level, point_estimate),
    `bias-corrected` = ci_bias_corrected(x, level, point_estimate)
  )
}

remove_missing_estimates <- function(estimates) {
  na_estimates <- is.na(estimates)
  na_estimates_n <- sum(na_estimates)

  if (na_estimates_n > 0) {
    cli_warn(
      "{na_estimates_n} estimates were missing and were removed when \\
               calculating the confidence interval."
    )
  }

  estimates[!na_estimates]
}

ci_percentile <- function(x, level) {
  # x[[ncol(x)]] pulls out the stat or estimate column
  estimates <- remove_missing_estimates(x[[ncol(x)]])

  ci_vec <- stats::quantile(estimates, probs = (1 + c(-level, level)) / 2)

  make_ci_df(ci_vec)
}

ci_se <- function(x, level, point_estimate) {
  point_estimate_ <- check_obs_stat(point_estimate)

  args <- list()

  if (inherits(x, "infer_dist")) {
    se <- attr(point_estimate, "se")
    qfn <- paste0("q", attr(x, "distribution"))
    if (attr(x, "distribution") == "t") {
      args <- list(df = attr(x, "df"))
    }
  } else {
    # x[[ncol(x)]] pulls out the stat or estimate column
    estimates <- remove_missing_estimates(x[[ncol(x)]])
    se <- stats::sd(estimates)

    qfn <- "qnorm"
  }

  args <- c(args, list(p = (1 + level) / 2))

  multiplier <- do.call(qfn, args)

  ci_vec <- point_estimate_ + c(-multiplier, multiplier) * se

  res <- make_ci_df(ci_vec)

  attr(res, "se") <- attr(point_estimate, "se")
  attr(res, "point_estimate") <- point_estimate_

  res
}

ci_bias_corrected <- function(x, level, point_estimate) {
  point_estimate <- check_obs_stat(point_estimate)

  # x[[ncol(x)]] pulls out the stat or estimate column
  estimates <- remove_missing_estimates(x[[ncol(x)]])

  p <- mean(estimates <= point_estimate)

  z0 <- stats::qnorm(p)
  # z_alpha_2 is z_(alpha/2)
  z_alpha_2 <- stats::qnorm((1 + c(-level, level)) / 2)
  new_probs <- stats::pnorm(2 * z0 + z_alpha_2)

  ci_vec <- stats::quantile(estimates, probs = new_probs)

  make_ci_df(ci_vec)
}

check_ci_args <- function(x, level, type, point_estimate, call = caller_env()) {
  if (!is.null(point_estimate)) {
    if (!is.data.frame(point_estimate)) {
      check_type(point_estimate, is.numeric, call = call)
    } else {
      check_type(point_estimate, is.data.frame, call = call)
      check_type(point_estimate[[1]][[1]], is.numeric, call = call)
    }
  }
  check_is_distribution(x, "get_confidence_interval")
  check_type(level, is.numeric, call = call)

  if ((level <= 0) || (level >= 1)) {
    cli_abort(
      "The value of {.arg level} must be between 0 and 1, non-inclusive.",
      call = call
    )
  }

  if (inherits(x, "infer_dist") && !is.null(type) && type != "se") {
    cli_abort(
      'The only {.arg type} option for theory-based confidence intervals \\
       is `type = "se"`.',
      call = call
    )
  }

  if (!(type %in% c("percentile", "se", "bias-corrected"))) {
    cli_abort(
      'The options for `type` are "percentile", "se", or "bias-corrected".',
      call = call
    )
  }

  if ((type %in% c("se", "bias-corrected")) && is.null(point_estimate)) {
    cli_abort(
      'A numeric value needs to be given for {.arg point_estimate} \\
       for `type` "se" or "bias-corrected".',
      call = call
    )
  }

  if (inherits(x, "infer_dist")) {
    # theoretical CIs require the full point estimate infer object as they
    # contain the necessary standard error
    if (!inherits(point_estimate, "infer")) {
      cli_abort(
        'For theoretical confidence intervals, the `point_estimate` argument \\
         must be an `infer` object. Have you made sure to supply the output of \\
         {.fun calculate} as the `point_estimate` argument?',
        call = call
      )
    }

    if (
      !attr(point_estimate, "stat") %in%
        c("mean", "prop", "diff in means", "diff in props")
    ) {
      cli_abort(
        'The only allowable statistics for theoretical confidence intervals \\
         are "mean", "prop", "diff in means", and "diff in props". See \\
         the "Details" section of \\
         {.help [{.fun get_confidence_interval}](infer::get_confidence_interval)} \\
         for more details.',
        call = call
      )
    }

    if (
      (attr(x, "distribution") == "t" &&
        !attr(point_estimate, "stat") %in% c("mean", "diff in means")) ||
        (attr(x, "distribution") == "norm" &&
          !attr(point_estimate, "stat") %in% c("prop", "diff in props"))
    ) {
      cli_abort(
        'Confidence intervals using a `{attr(x, "dist_")}` distribution for \\
         `stat = {attr(point_estimate, "stat")}` are not implemented.',
        call = call
      )
    }
  }
}

make_ci_df <- function(ci_vec) {
  tibble::tibble(lower_ci = ci_vec[[1]], upper_ci = ci_vec[[2]])
}


================================================
FILE: R/get_p_value.R
================================================
#' Compute p-value
#'
#' @description
#'
#' Compute a p-value from a null distribution and observed statistic.
#'
#' Learn more in `vignette("infer")`.
#'
#' @param x A null distribution. For simulation-based inference, a data frame
#'   containing a distribution of [calculate()]d statistics
#'   or [`fit()`][fit.infer()]ted coefficient estimates. This object should
#'   have been passed to [generate()] before being supplied or
#'   [calculate()] to [`fit()`][fit.infer()]. For theory-based inference,
#'   the output of [assume()].
#' @param obs_stat A data frame containing the observed statistic (in a
#'   [calculate()]-based workflow) or observed fit (in a
#'   [`fit()`][fit.infer()]-based workflow). This object is likely the output
#'   of [calculate()] or [`fit()`][fit.infer()] and need not
#'   to have been passed to [generate()].
#' @param direction A character string. Options are `"less"`, `"greater"`, or
#'   `"two-sided"`. Can also use `"left"`, `"right"`, `"both"`,
#'   `"two_sided"`, or `"two sided"`, `"two.sided"`.
#'
#' @return A [tibble][tibble::tibble] containing the following columns:
#'
#' \itemize{
#'   \item `term`: The explanatory variable (or intercept) in question. Only
#'     supplied if the input had been previously passed to [`fit()`][fit.infer()].
#'   \item `p_value`: A value in \[0, 1\] giving the probability that a
#'     statistic/coefficient as or more extreme than the observed
#'     statistic/coefficient would occur if the null hypothesis were true.
#' }
#'
#'
#' @section Aliases:
#' `get_pvalue()` is an alias of `get_p_value()`.
#' `p_value` is a deprecated alias of `get_p_value()`.
#'
#' @section Zero p-value:
#' Though a true p-value of 0 is impossible, `get_p_value()` may return 0 in
#' some cases. This is due to the simulation-based nature of the \{infer\}
#' package; the output of this function is an approximation based on
#' the number of `reps` chosen in the `generate()` step. When the observed
#' statistic is very unlikely given the null hypothesis, and only a small
#' number of `reps` have been generated to form a null distribution,
#' it is possible that the observed statistic will be more extreme than
#' every test statistic generated to form the null distribution, resulting
#' in an approximate p-value of 0. In this case, the true p-value is a small
#' value likely less than `3/reps` (based on a poisson approximation).
#'
#' In the case that a p-value of zero is reported, a warning message will be
#' raised to caution the user against reporting a p-value exactly equal to 0.
#'
#'
#' @examples
#'
#' # using a simulation-based null distribution ------------------------------
#'
#' # find the point estimate---mean number of hours worked per week
#' point_estimate <- gss |>
#'   specify(response = hours) |>
#'   calculate(stat = "mean")
#'
#' # starting with the gss dataset
#' gss |>
#'   # ...we're interested in the number of hours worked per week
#'   specify(response = hours) |>
#'   # hypothesizing that the mean is 40
#'   hypothesize(null = "point", mu = 40) |>
#'   # generating data points for a null distribution
#'   generate(reps = 1000, type = "bootstrap") |>
#'   # finding the null distribution
#'   calculate(stat = "mean") |>
#    # calculate the p-value for the point estimate
#'   get_p_value(obs_stat = point_estimate, direction = "two-sided")
#'
#' # using a theoretical null distribution -----------------------------------
#'
#' # calculate the observed statistic
#' obs_stat <- gss |>
#'   specify(response = hours) |>
#'   hypothesize(null = "point", mu = 40) |>
#'   calculate(stat = "t")
#'
#' # define a null distribution
#' null_dist <- gss |>
#'   specify(response = hours) |>
#'   assume("t")
#'
#' # calculate a p-value
#' get_p_value(null_dist, obs_stat, direction = "both")
#'
#' # using a model fitting workflow -----------------------------------------
#'
#' # fit a linear model predicting number of hours worked per
#' # week using respondent age and degree status.
#' observed_fit <- gss |>
#'   specify(hours ~ age + college) |>
#'   fit()
#'
#' observed_fit
#'
#' # fit 100 models to resamples of the gss dataset, where the response
#' # `hours` is permuted in each. note that this code is the same as
#' # the above except for the addition of the `generate` step.
#' null_fits <- gss |>
#'   specify(hours ~ age + college) |>
#'   hypothesize(null = "independence") |>
#'   generate(reps = 100, type = "permute") |>
#'   fit()
#'
#' null_fits
#'
#' get_p_value(null_fits, obs_stat = observed_fit, direction = "two-sided")
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @name get_p_value
#' @export
get_p_value <- function(x, obs_stat, direction) {
  UseMethod("get_p_value", x)
}

#' @rdname get_p_value
#' @family auxillary functions
#' @method get_p_value default
#' @export
get_p_value.default <- function(x, obs_stat, direction) {
  check_type(x, is.data.frame)
  if (!is_generated(x) & is_hypothesized(x)) {
    cli_abort(c(
      "Theoretical p-values are not yet supported. ",
      i = "`x` should be the result of calling {.fun generate}."
    ))
  }
  check_for_nan(x, "get_p_value")
  check_direction(direction)

  if (is_fitted(x)) {
    # check that x and obs stat reference the same variables
    check_mlr_x_and_obs_stat(
      x,
      obs_stat,
      "get_p_value",
      "obs_stat"
    )

    # split up x and obs_stat by term
    term_data <- x |>
      dplyr::ungroup() |>
      dplyr::group_by(term) |>
      dplyr::group_split() |>
      purrr::map(copy_attrs, x)

    term_obs_stats <- obs_stat |>
      dplyr::ungroup() |>
      dplyr::group_by(term) |>
      dplyr::group_split()

    # calculate the p value for each term and then add the term column back in
    purrr::map2_dfr(
      term_data,
      purrr::map(term_obs_stats, purrr::pluck, "estimate"),
      simulation_based_p_value,
      direction = direction
    ) |>
      dplyr::mutate(
        term = purrr::map_chr(term_obs_stats, purrr::pluck, "term"),
        .before = dplyr::everything()
      )
  } else {
    simulation_based_p_value(x = x, obs_stat = obs_stat, direction = direction)
  }
}

#' @rdname get_p_value
#' @export
get_pvalue <- get_p_value

#' @rdname get_p_value
#' @method get_p_value infer_dist
#' @export
get_p_value.infer_dist <- function(x, obs_stat, direction) {
  # check the null hypotheses attached to x and obs_stat
  check_hypotheses_align(x, obs_stat)

  # parse the distribution function
  dist_fn <- paste0("p", attr(x, "distribution"))

  # translate the direction argument
  dir <- norm_direction(direction)

  lower_tail <- switch(
    dir,
    `left` = TRUE,
    `right` = FALSE,
    `both` = TRUE
  )

  # supply everything to the base R distribution function
  res <- do.call(
    dist_fn,
    c(
      list(q = as.numeric(obs_stat), lower.tail = lower_tail),
      process_df(attr(x, "df"))
    )
  )

  if (dir == "both") {
    res <- min(res, 1 - res) * 2
  }

  tibble::tibble(p_value = res)
}

simulation_based_p_value <- function(
  x,
  obs_stat,
  direction,
  call = caller_env()
) {
  check_x_vs_obs_stat(x, obs_stat, call = call)
  obs_stat <- check_obs_stat(obs_stat)

  # x[[ncol(x)]] pulls out the stat or estimate column
  if (direction %in% c("less", "left")) {
    pval <- left_p_value(x[[ncol(x)]], obs_stat)
  } else if (direction %in% c("greater", "right")) {
    pval <- right_p_value(x[[ncol(x)]], obs_stat)
  } else {
    pval <- two_sided_p_value(x[[ncol(x)]], obs_stat)
  }

  if (abs(pval) < 1e-16) {
    cli_warn(c(
      "Please be cautious in reporting a p-value of 0. This result is an \\
       approximation based on the number of `reps` chosen in the {.fun generate} step.",
      i = "See {.help [{.fun get_p_value}](infer::get_p_value)} for more information."
    ))
  }

  tibble::tibble(p_value = pval)
}

left_p_value <- function(vec, obs_stat) {
  mean(vec <= obs_stat)
}

right_p_value <- function(vec, obs_stat) {
  mean(vec >= obs_stat)
}

two_sided_p_value <- function(vec, obs_stat) {
  left_pval <- left_p_value(vec, obs_stat)
  right_pval <- right_p_value(vec, obs_stat)
  raw_res <- 2 * min(left_pval, right_pval)

  min(raw_res, 1)
}

check_hypotheses_align <- function(x, obs_stat) {
  if (
    is_hypothesized(x) &&
      is_hypothesized(obs_stat) &&
      any(attr(x, "params") != attr(obs_stat, "params"))
  ) {
    cli_warn(
      "`x` and `obs_stat` were generated using different null hypotheses. \\
        This workflow is untested and results may not mean what you think \\
        they mean."
    )
  }
}

check_x_vs_obs_stat <- function(x, obs_stat, call = caller_env()) {
  # check if x and obs_stat might have been mistakenly supplied
  # in the reverse order
  if (
    is_generated(obs_stat) &&
      !is_generated(x)
  ) {
    cli_abort(
      c(
        "It seems like the `obs_stat` argument has been passed to `get_p_value()` \\
       as the first argument when `get_p_value()` expects `x`, a distribution \\
       of statistics or coefficient estimates, as the first argument. ",
        i = "Have you mistakenly switched the order of `obs_stat` and `x`?"
      ),
      call = call
    )
  }

  invisible(TRUE)
}

# which_distribution <- function(x, theory_type, obs_stat, direction){
#
#   param <- attr(x, "distr_param")
#   if(!is.null(attr(x, "distr_param2")))
#     param2 <- attr(x, "distr_param2")
#
#   if(theory_type == "Two sample t")
#     return(
#       pt(q = obs_stat,
#          df = param,
#          lower.tail = set_lower_tail(direction)
#         )
#     )
# }

#theory_t_pvalue <-

# set_lower_tail <- function(direction){
#   if(direction %in% c("greater", "right"))
#     lower_tail <- FALSE
#   else
#     lower_tail <- TRUE
#
#   lower_tail
# }


================================================
FILE: R/gss.R
================================================
#' Subset of data from the General Social Survey (GSS).
#'
#' The General Social Survey is a high-quality survey which gathers data on
#' American society and opinions, conducted since 1972. This data set is a
#' sample of 500 entries from the GSS, spanning years 1973-2018,
#' including demographic markers and some
#' economic variables. Note that this data is included for demonstration only,
#' and should not be assumed to provide accurate estimates relating to the GSS.
#' However, due to the high quality of the GSS, the unweighted data will
#' approximate the weighted data in some analyses.
#' @format A tibble with 500 rows and 11 variables:
#' \describe{
#'   \item{year}{year respondent was surveyed}
#'   \item{age}{age at time of survey, truncated at 89}
#'   \item{sex}{respondent's sex (self-identified)}
#'   \item{college}{whether on not respondent has a college degree, including
#'   junior/community college}
#'   \item{partyid}{political party affiliation}
#'   \item{hompop}{number of persons in household}
#'   \item{hours}{number of hours worked in week before survey, truncated at 89}
#'   \item{income}{total family income}
#'   \item{class}{subjective socioeconomic class identification}
#'   \item{finrela}{opinion of family income}
#'   \item{weight}{survey weight}
#' }
#' @source \url{https://gss.norc.org}
"gss"


================================================
FILE: R/hypothesize.R
================================================
#' Declare a null hypothesis
#'
#' @description
#'
#' Declare a null hypothesis about variables selected in [specify()].
#'
#' Learn more in `vignette("infer")`.
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @param null The null hypothesis. Options include `"independence"`,
#'   `"point"`, and `"paired independence"`.
#' \itemize{
#'   \item `independence`: Should be used with both a `response` and `explanatory`
#'   variable. Indicates that the values of the specified `response` variable
#'   are independent of the associated values in `explanatory`.
#'   \item `point`: Should be used with only a `response` variable. Indicates
#'   that a point estimate based on the values in `response` is associated
#'   with a parameter. Sometimes requires supplying one of `p`, `mu`, `med`, or
#'   `sigma`.
#'   \item `paired independence`: Should be used with only a `response` variable
#'   giving the pre-computed difference between paired observations. Indicates
#'   that the order of subtraction between paired values does not affect the
#'   resulting distribution.
#' }
#' @param p The true proportion of successes (a number between 0 and 1). To be used with point null hypotheses when the specified response
#' variable is categorical.
#' @param mu The true mean (any numerical value). To be used with point null
#' hypotheses when the specified response variable is continuous.
#' @param med The true median (any numerical value). To be used with point null
#' hypotheses when the specified response variable is continuous.
#' @param sigma The true standard deviation (any numerical value). To be used with
#' point null hypotheses.
#'
#' @return A tibble containing the response (and explanatory, if specified)
#'   variable data with parameter information stored as well.
#'
#' @examples
#' # hypothesize independence of two variables
#' gss |>
#'  specify(college ~ partyid, success = "degree") |>
#'  hypothesize(null = "independence")
#'
#' # hypothesize a mean number of hours worked per week of 40
#' gss |>
#'   specify(response = hours) |>
#'   hypothesize(null = "point", mu = 40)
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @importFrom purrr compact
#' @family core functions
#' @export
hypothesize <- function(
  x,
  null,
  p = NULL,
  mu = NULL,
  med = NULL,
  sigma = NULL
) {
  # Check arguments
  if (missing(null)) {
    null <- NA
  }
  null <- match_null_hypothesis(null)
  hypothesize_checks(x, null)

  attr(x, "null") <- null
  attr(x, "hypothesized") <- TRUE

  dots <- compact(list(p = p, mu = mu, med = med, sigma = sigma))

  # Set parameters and determine appropriate generation type
  switch(
    null,
    independence = {
      params <- sanitize_hypothesis_params_independence(dots)
      attr(x, "type") <- "permute"
    },
    point = {
      params <- sanitize_hypothesis_params_point(dots, x)
      attr(x, "params") <- unlist(params)

      if (!is.null(params$p)) {
        attr(x, "type") <- "draw"
      } else {
        # Check one proportion test set up correctly
        if (is.factor(response_variable(x))) {
          cli_abort(
            'Testing one categorical variable requires `p` to be used as a \\
             parameter.'
          )
        }
        attr(x, "type") <- "bootstrap"
      }
    },
    `paired independence` = {
      params <- sanitize_hypothesis_params_paired_independence(dots)
      attr(x, "type") <- "permute"
    }
  )

  res <- append_infer_class(tibble::as_tibble(x))

  copy_attrs(to = res, from = x)
}

#' @rdname hypothesize
#' @export
hypothesise <- hypothesize

hypothesize_checks <- function(x, null, call = caller_env()) {
  if (!inherits(x, "data.frame")) {
    cli_abort("x must be a data.frame or tibble", call = call)
  }

  if ((null == "independence") && !has_explanatory(x)) {
    cli_abort(
      'Please {.fun specify} an explanatory and a response variable when \\
       testing a null hypothesis of `"independence"`.',
      call = call
    )
  }

  if (null == "paired independence" && has_explanatory(x)) {
    cli_abort(
      c(
        'Please {.fun specify} only a response variable when \\
           testing a null hypothesis of `"paired independence"`.',
        "i" = 'The supplied response variable should be the \\
                 pre-computed difference between paired observations.'
      ),
      call = call
    )
  }
}

match_null_hypothesis <- function(null, call = caller_env()) {
  null_hypothesis_types <- c("point", "independence", "paired independence")

  if (length(null) != 1) {
    cli_abort(
      'You should specify exactly one type of null hypothesis.',
      call = call
    )
  }

  i <- pmatch(null, null_hypothesis_types)

  if (is.na(i)) {
    cli_abort(
      '`null` should be either "point", "independence", or "paired independence".',
      call = call
    )
  }

  null_hypothesis_types[i]
}

sanitize_hypothesis_params_independence <- function(dots) {
  if (length(dots) > 0) {
    cli_warn(
      "Parameter values should not be specified when testing that two \\
       variables are independent."
    )
  }

  NULL
}

sanitize_hypothesis_params_point <- function(dots, x, call = caller_env()) {
  if (length(dots) != 1) {
    cli_abort(
      "You must specify exactly one of `p`, `mu`, `med`, or `sigma`.",
      call = call
    )
  }

  if (!is.null(dots$p)) {
    dots$p <- sanitize_hypothesis_params_proportion(dots$p, x, call = call)
  }

  dots
}

sanitize_hypothesis_params_proportion <- function(p, x, call = caller_env()) {
  eps <- if (capabilities("long.double")) {
    sqrt(.Machine$double.eps)
  } else {
    0.01
  }

  if (anyNA(p)) {
    cli_abort(
      '`p` should not contain missing values.',
      call = call
    )
  }

  if (any(p < 0 | p > 1)) {
    cli_abort(
      '`p` should only contain values between zero and one.',
      call = call
    )
  }

  if (length(p) == 1) {
    if (!has_attr(x, "success")) {
      cli_abort(
        "A point null regarding a proportion requires that `success` \\
          be indicated in `specify()`.",
        call = call
      )
    }

    p <- c(p, 1 - p)
    names(p) <- get_success_then_response_levels(x)
  } else {
    if (sum(p) < 1 - eps | sum(p) > 1 + eps) {
      cli_abort(
        "Make sure the hypothesized values for the `p` parameters sum to 1. \\
          Please try again.",
        call = call
      )
    }
  }

  p
}

sanitize_hypothesis_params_paired_independence <- function(dots) {
  if (length(dots) > 0) {
    cli_warn(
      "Parameter values should not be specified when testing paired independence."
    )
  }

  NULL
}


================================================
FILE: R/infer.R
================================================
#' infer: a grammar for statistical inference
#'
#' The objective of this package is to perform statistical inference using a
#' grammar that illustrates the underlying concepts and a format that coheres
#' with the tidyverse.
#'
#' For an overview of how to use the core functionality, see `vignette("infer")`
#'
#'
#' @docType package
#' @name infer
"_PACKAGE"

#' @importFrom cli cli_abort cli_warn cli_inform qty no

## quiets concerns of R CMD check re: the .'s that appear in pipelines
## From Jenny Bryan's googlesheets package
if (getRversion() >= "2.15.1") {
  utils::globalVariables(
    c(
      "prop",
      "stat",
      "value",
      "x",
      "y",
      "..density..",
      "statistic",
      ".",
      "parameter",
      "p.value",
      "xmin",
      "x_min",
      "xmax",
      "x_max",
      "density",
      "denom",
      "diff_prop",
      "group_num",
      "n1",
      "n2",
      "num_suc",
      "p_hat",
      "total_suc",
      "explan",
      "probs",
      "conf.low",
      "conf.high",
      "prop_1",
      "prop_2",
      "data",
      "setNames",
      "resp",
      "capture.output",
      "stats",
      "estimate",
      "any_of",
      "model",
      "term",
      "where",
      "hypothesis"
    )
  )
}


================================================
FILE: R/observe.R
================================================
#' Calculate observed statistics
#'
#' @description
#'
#' This function is a wrapper that calls [specify()], [hypothesize()], and
#' [calculate()] consecutively that can be used to calculate observed
#' statistics from data. [hypothesize()] will only be called if a point
#' null hypothesis parameter is supplied.
#'
#' Learn more in `vignette("infer")`.
#'
#' @inheritParams specify
#' @inheritParams hypothesize
#' @inheritParams calculate
#'
#' @return A 1-column tibble containing the calculated statistic `stat`.
#'
#' @inheritSection calculate Arbitrary test statistics
#'
#' @examples
#' # calculating the observed mean number of hours worked per week
#' gss |>
#'   observe(hours ~ NULL, stat = "mean")
#'
#' # equivalently, calculating the same statistic with the core verbs
#' gss |>
#'   specify(response = hours) |>
#'   calculate(stat = "mean")
#'
#' # calculating a t statistic for hypothesized mu = 40 hours worked/week
#' gss |>
#'   observe(hours ~ NULL, stat = "t", null = "point", mu = 40)
#'
#' # equivalently, calculating the same statistic with the core verbs
#' gss |>
#'   specify(response = hours) |>
#'   hypothesize(null = "point", mu = 40) |>
#'   calculate(stat = "t")
#'
#' # similarly for a difference in means in age based on whether
#' # the respondent has a college degree
#' observe(
#'   gss,
#'   age ~ college,
#'   stat = "diff in means",
#'   order = c("degree", "no degree")
#' )
#'
#' # equivalently, calculating the same statistic with the core verbs
#' gss |>
#'   specify(age ~ college) |>
#'   calculate("diff in means", order = c("degree", "no degree"))
#'
#' # for a more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @family wrapper functions
#' @family functions for calculating observed statistics
#' @export
observe <- function(
  x,
  # specify arguments
  formula,
  response = NULL,
  explanatory = NULL,
  success = NULL,
  # hypothesize arguments
  null = NULL,
  p = NULL,
  mu = NULL,
  med = NULL,
  sigma = NULL,
  # calculate arguments
  stat = c(
    "mean",
    "median",
    "sum",
    "sd",
    "prop",
    "count",
    "diff in means",
    "diff in medians",
    "diff in props",
    "Chisq",
    "F",
    "slope",
    "correlation",
    "t",
    "z",
    "ratio of props",
    "odds ratio"
  ),
  order = NULL,
  ...
) {
  # use hypothesize() if appropriate (or needed to pass an informative
  # message/warning). otherwise, pipe directly to calculate().
  if (!all(sapply(list(p, mu, med, sigma), is.null))) {
    hypothesize_fn <- hypothesize
  } else {
    hypothesize_fn <- function(x, ...) {
      x
    }
  }

  # pass arguments on to core verbs
  res <- 
    specify(
    x = x,
    formula = formula,
    response = {{ response }},
    explanatory = {{ explanatory }},
    success = success
    )
  
  hypothesize_fn(
    res,
    null = if (has_explanatory(res)) {
      "independence"
    } else {
      "point"
    },
    p = p,
    mu = mu,
    med = med,
    sigma = sigma
  ) |>
  calculate(
    stat = stat,
    order = order,
    ...
  )
}


================================================
FILE: R/pipe.R
================================================
#' Pipe
#'
#' Like \{dplyr\}, \{infer\} also uses the pipe (\code{|>}) function
#' from \code{magrittr} to turn function composition into a series of
#' iterative statements.
#'
#' @param lhs,rhs Inference functions and the initial data frame.
#'
#' @importFrom magrittr %>%
#' @name %>%
#' @rdname pipe
#' @export
NULL


================================================
FILE: R/print_methods.R
================================================
#' Print methods
#'
#' @param x An object of class `infer`, i.e. output from [specify()] or
#'   [hypothesize()], or of class `infer_layer`, i.e. output from
#'   [shade_p_value()] or [shade_confidence_interval()].
#' @param ... Arguments passed to methods.
#' @importFrom glue glue_collapse glue
#'
#' @rdname print.infer
#' @export
print.infer <- function(x, ...) {
  attrs <- names(attributes(x))
  header <- character(3)
  if ("response" %in% attrs) {
    header[1] <- glue(
      'Response: {response_name(x)} ({attr(x, "response_type")})',
      .null = "NULL"
    )
    if ("explanatory" %in% attrs) {
      header[2] <- glue(
        'Explanatory: {paste0(paste0(explanatory_name(x), " (",
        attr(x, "explanatory_type"), ")"), collapse = ", ")}',
        .null = "NULL"
      )
    }
  }
  if ("null" %in% attrs) {
    header[3] <- glue('Null Hypothesis: {attr(x, "null")}', .null = "NULL")
  }

  cat(glue::glue_collapse(
    header[header != ""],
    width = cli::console_width(),
    sep = "\n"
  ))
  cat("\n")

  NextMethod()
}

#' @rdname print.infer
#' @export
print.infer_layer <- function(x, ...) {
  cat(x)
}

#' @rdname print.infer
#' @export
print.infer_dist <- function(x, ...) {
  cat(x)
}


================================================
FILE: R/rep_sample_n.R
================================================
#' Perform repeated sampling
#'
#' @description
#'
#' These functions extend the functionality of [dplyr::sample_n()] and
#' [dplyr::slice_sample()] by allowing for repeated sampling of data.
#' This operation is especially helpful while creating sampling
#' distributions—see the examples below!
#'
#' @param tbl,.data Data frame of population from which to sample.
#' @param size,n,prop `size` and `n` refer to the sample size of each sample.
#' The `size` argument to `rep_sample_n()` is required, while in
#' `rep_slice_sample()` sample size defaults to 1 if not specified. `prop`, an
#' argument to `rep_slice_sample()`, refers to the proportion of rows to sample
#' in each sample, and is rounded down in the case that `prop * nrow(.data)` is
#' not an integer. When using `rep_slice_sample()`, please only supply one of
#' `n` or `prop`.
#' @param replace Should samples be taken with replacement?
#' @param reps Number of samples to take.
#' @param prob,weight_by A vector of sampling weights for each of the rows in
#' `.data`—must have length equal to `nrow(.data)`. For `weight_by`, this
#' may also be an unquoted column name in `.data`.
#'
#' @details
#'
#' `rep_sample_n()` and `rep_slice_sample()` are designed to behave similar to
#' their dplyr counterparts. As such, they have at least the following
#' differences:
#' - In case `replace = FALSE` having `size` bigger than number of data rows in
#' `rep_sample_n()` will give an error. In `rep_slice_sample()` having such `n`
#' or `prop > 1` will give warning and output sample size will be set to number
#' of rows in data.
#'
#' Note that the [dplyr::sample_n()] function  has been superseded by
#' [dplyr::slice_sample()].
#'
#' @return A tibble of size `reps * n` rows corresponding to `reps`
#'   samples of size `n` from `.data`, grouped by `replicate`.
#'
#' @examples
#' library(dplyr)
#' library(ggplot2)
#' library(tibble)
#'
#' # take 1000 samples of size n = 50, without replacement
#' slices <- gss |>
#'   rep_slice_sample(n = 50, reps = 1000)
#'
#' slices
#'
#' # compute the proportion of respondents with a college
#' # degree in each replicate
#' p_hats <- slices |>
#'   group_by(replicate) |>
#'   summarize(prop_college = mean(college == "degree"))
#'
#' # plot sampling distribution
#' ggplot(p_hats, aes(x = prop_college)) +
#'   geom_density() +
#'   labs(
#'     x = "p_hat", y = "Number of samples",
#'     title = "Sampling distribution of p_hat"
#'   )
#'
#' # sampling with probability weights. Note probabilities are automatically
#' # renormalized to sum to 1
#' df <- tibble(
#'   id = 1:5,
#'   letter = factor(c("a", "b", "c", "d", "e"))
#' )
#'
#' rep_slice_sample(df, n = 2, reps = 5, weight_by = c(.5, .4, .3, .2, .1))
#'
#' # alternatively, pass an unquoted column name in `.data` as `weight_by`
#' df <- df |> mutate(wts = c(.5, .4, .3, .2, .1))
#'
#' rep_slice_sample(df, n = 2, reps = 5, weight_by = wts)
#' @export
rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
  check_type(tbl, is.data.frame)
  check_type(size, is_single_number, "single non-negative number", min_val = 0)
  check_type(replace, is_truefalse, "TRUE or FALSE")
  check_type(
    reps,
    is_single_number,
    "single number not less than 1",
    min_val = 1
  )
  check_type(
    prob,
    ~ is.numeric(.) && (length(.) == nrow(tbl)),
    glue::glue("numeric vector with length `nrow(tbl)` = {nrow(tbl)}"),
    allow_null = TRUE
  )

  # In `dplyr::sample_n()` `size` can't be more than number of rows in data
  notify_extra_size(size, tbl, replace, notify_type = "sample_n")

  make_replicate_tbl(
    tbl = tbl,
    size = size,
    replace = replace,
    prob = prob,
    reps = reps
  )
}

#' @rdname rep_sample_n
#' @export
rep_slice_sample <- function(
  .data,
  n = NULL,
  prop = NULL,
  replace = FALSE,
  weight_by = NULL,
  reps = 1
) {
  check_type(.data, is.data.frame)
  check_type(
    n,
    is_single_number,
    "single non-negative number",
    allow_null = TRUE,
    min_val = 0
  )
  check_type(
    prop,
    is_single_number,
    "single non-negative number",
    allow_null = TRUE,
    min_val = 0
  )
  check_type(replace, is_truefalse, "TRUE or FALSE")
  eval_weight_by <- try(rlang::eval_tidy(weight_by), silent = TRUE)
  if (inherits(eval_weight_by, "try-error")) {
    weight_by <- rlang::enquo(weight_by)
    check_cols(.data, weight_by, "permute", FALSE, "weight_by")
    weight_by <- .data[[rlang::as_name(weight_by)]]
  }
  check_type(
    weight_by,
    ~ is.numeric(.) && (length(.) == nrow(.data)),
    glue::glue(
      "a numeric vector with length `nrow(.data)` = {nrow(.data)} \\
                 or an unquoted column name"
    ),
    allow_null = TRUE
  )
  check_type(
    reps,
    is_single_number,
    "single number not less than 1",
    min_val = 1
  )

  # Compute sample size based on `n` and `prop`
  size <- make_slice_size(n = n, prop = prop, n_total = nrow(.data))

  # In `dplyr::slice_sample()` asked sample size is allowed to be bigger than
  # number of rows in data. In that case (at least currently) sample size is
  # silently replaced to be number of rows. Here we give a warning.
  notify_extra_size(size, .data, replace, notify_type = "slice_sample")

  make_replicate_tbl(
    tbl = .data,
    size = size,
    replace = replace,
    prob = weight_by,
    reps = reps
  )
}

make_replicate_tbl <- function(tbl, size, replace, prob, reps) {
  # Generate row indexes for every future replicate (this way it respects
  # possibility of `replace = FALSE`)
  n <- nrow(tbl)

  if (!replace) {
    idx_list <- replicate(
      reps,
      sample_int(n, size, replace = FALSE, prob = prob),
      simplify = FALSE
    )
  } else {
    idx_list <- sample_int(n, size * reps, replace = TRUE, prob = prob)
    idx_list <- vctrs::vec_chop(idx_list, sizes = rep(size, reps))
  }

  # Get actual sample size which can differ from `size` (currently if it is
  # bigger than number of rows in `tbl` inside `rep_slice_sample()`)
  sample_size <- length(idx_list[[1]])
  i <- unlist(idx_list)

  res <- vctrs::vec_slice(tbl, i)
  res <-
    dplyr::bind_cols(
      tibble::new_tibble(list(
        replicate = rep(seq_len(reps), each = sample_size)
      )),
      res
    )
  res <- group_by_replicate(res, reps = reps, n = sample_size)
  copy_attrs(res, tbl)
}

notify_extra_size <- function(
  size,
  tbl,
  replace,
  notify_type,
  call = caller_env()
) {
  if (!replace && (size > nrow(tbl))) {
    msg <- glue::glue(
      "Asked sample size ({size}) is bigger than ",
      "number of rows in data ({nrow(tbl)}) while `replace` is FALSE"
    )
    switch(
      notify_type,
      sample_n = cli_abort("{msg}. Use `replace = TRUE`.", call = call),
      slice_sample = cli_warn("{msg}. Using number of rows as sample size.")
    )
  }

  TRUE
}

# Modified code from https://github.com/tidyverse/dplyr/blob/master/R/slice.R
# (at commit 0f29aa4)
sample_int <- function(n, size, replace = FALSE, prob = NULL) {
  if (!replace) {
    # If `replace` is `FALSE`, allow `size` to be bigger than `n` by silently
    # replacing it with `n`
    size <- min(size, n)
  }

  if (size == 0L) {
    integer(0)
  } else {
    sample.int(n, size, prob = prob, replace = replace)
  }
}

make_slice_size <- function(n, prop, n_total, call = caller_env()) {
  if (is.null(n)) {
    if (is.null(prop)) {
      # By default return size 1
      1L
    } else {
      as.integer(n_total * prop)
    }
  } else {
    if (is.null(prop)) {
      n
    } else {
      cli_abort(
        "Please supply exactly one of the `n` or `prop` arguments.",
        call = call
      )
    }
  }
}


================================================
FILE: R/set_params.R
================================================
#' To determine which theoretical distribution to fit (if any)
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#'
#' @noRd
set_params <- function(x) {
  attr(x, "theory_type") <- NULL

  if (has_response(x)) {
    num_response_levels <- length(unique(response_variable(x)))

    check_factor_levels(
      response_variable(x),
      "response",
      response_name(x)
    )
  }

  if (is_mlr(x)) {
    return(x)
  }

  if (has_explanatory(x)) {
    num_explanatory_levels <- length(unique(explanatory_variable(x)))

    check_factor_levels(
      explanatory_variable(x),
      "explanatory",
      explanatory_name(x)
    )
  }

  # One variable
  if (
    has_response(x) &&
      !has_explanatory(x) &&
      has_attr(x, "response_type") &&
      !has_attr(x, "explanatory_type")
  ) {
    # One mean
    if (attr(x, "response_type") %in% c("integer", "numeric")) {
      attr(x, "theory_type") <- "One sample t"
      attr(x, "distr_param") <- stats::t.test(
        response_variable(x)
      )[["parameter"]]
      attr(x, "type") <- "bootstrap"
    } else if (
      # One prop
      (attr(x, "response_type") == "factor") && (num_response_levels == 2)
    ) {
      # No parameters since standard normal
      attr(x, "theory_type") <- "One sample prop z"
      # Changed to `"draw"` when `p` provided in `hypothesize()`
      attr(x, "type") <- "bootstrap"
    } else {
      attr(x, "theory_type") <- "Chi-square Goodness of Fit"
      attr(x, "distr_param") <- num_response_levels - 1
      attr(x, "type") <- "draw"
    }
  }

  # Two variables
  if (
    has_response(x) &&
      has_explanatory(x) &
      has_attr(x, "response_type") &&
      has_attr(x, "explanatory_type")
  ) {
    attr(x, "type") <- "bootstrap"

    # Response is numeric, explanatory is categorical
    if (
      (attr(x, "response_type") %in% c("integer", "numeric")) &
        (attr(x, "explanatory_type") == "factor")
    ) {
      # Two sample means (t distribution)
      if (num_explanatory_levels == 2) {
        attr(x, "theory_type") <- "Two sample t"
        # Keep track of Satterthwaite degrees of freedom since lost when
        # in aggregation w/ calculate()/generate()
        attr(x, "distr_param") <- stats::t.test(
          response_variable(x) ~ explanatory_variable(x)
        )[["parameter"]]
      } else {
        # >2 sample means (F distribution)
        attr(x, "theory_type") <- "ANOVA"
        # Get numerator and denominator degrees of freedom
        degrees <- stats::anova(stats::aov(
          response_variable(x) ~ explanatory_variable(x)
        ))$Df
        attr(x, "distr_param") <- degrees[1]
        attr(x, "distr_param2") <- degrees[2]
      }
    }

    # Response is categorical, explanatory is categorical
    if (
      (attr(x, "response_type") == "factor") &
        (attr(x, "explanatory_type") == "factor")
    ) {
      attr(x, "type") <- "bootstrap"

      # Two sample proportions (z distribution)
      # Parameter(s) not needed since standard normal
      if (
        (num_response_levels == 2) &
          (num_explanatory_levels == 2)
      ) {
        attr(x, "theory_type") <- "Two sample props z"
      } else {
        # >2 sample proportions (chi-square test of indep)
        attr(x, "theory_type") <- "Chi-square test of indep"
        attr(x, "distr_param") <- suppressWarnings(
          stats::chisq.test(
            table(response_variable(x), explanatory_variable(x))
          )$parameter
        )
      }
    }

    # Response is numeric, explanatory is numeric
    if (
      (attr(x, "response_type") %in% c("integer", "numeric")) &
        (attr(x, "explanatory_type") %in% c("integer", "numeric"))
    ) {
      response_string <- response_name(x)
      explanatory_string <- explanatory_name(x)
      attr(x, "theory_type") <- "Slope/correlation with t"
      attr(x, "distr_param") <- nrow(x) - 2
    }
  }

  x
}

check_factor_levels <- function(x, type, name) {
  if (is.factor(x)) {
    unused <- setdiff(levels(x), unique(x))

    if (length(unused) > 0 && !suppress_infer_messages()) {
      cli_inform(
        "Dropping unused factor levels {list(unused)} from the \\
         supplied {type} variable '{name}'."
      )
    }
  }
}


================================================
FILE: R/shade_confidence_interval.R
================================================
#' Add information about confidence interval
#'
#' @description
#'
#' `shade_confidence_interval()` plots a confidence interval region on top of
#' [visualize()] output. The output is a ggplot2 layer that can be added with
#' `+`. The function has a shorter alias, `shade_ci()`.
#'
#' Learn more in `vignette("infer")`.
#'
#' @param endpoints The lower and upper bounds of the interval to be plotted.
#'   Likely, this will be the output of [get_confidence_interval()].
#'   For [calculate()]-based workflows, this will be a 2-element vector
#'   or a `1 x 2` data frame containing the lower and upper values to be plotted.
#'   For [`fit()`][fit.infer()]-based workflows, a `(p + 1) x 3` data frame
#'   with columns `term`, `lower_ci`, and `upper_ci`, giving the upper and
#'   lower bounds for each regression term. For use in visualizations of
#'   [assume()] output, this must be the output of [get_confidence_interval()].
#' @param color A character or hex string specifying the color of the
#'   end points as a vertical lines on the plot.
#' @param fill A character or hex string specifying the color to shade the
#'   confidence interval. If `NULL` then no shading is actually done.
#' @param ... Other arguments passed along to ggplot2 functions.
#'
#' @return If added to an existing infer visualization, a ggplot2
#'   object displaying the supplied intervals on top of its corresponding
#'   distribution. Otherwise, an `infer_layer` list.
#'
#' @examples
#' # find the point estimate---mean number of hours worked per week
#' point_estimate <- gss |>
#'   specify(response = hours) |>
#'   calculate(stat = "mean")
#'
#' # ...and a bootstrap distribution
#' boot_dist <- gss |>
#'   # ...we're interested in the number of hours worked per week
#'   specify(response = hours) |>
#'   # generating data points
#'   generate(reps = 1000, type = "bootstrap") |>
#'   # finding the distribution from the generated data
#'   calculate(stat = "mean")
#'
#' # find a confidence interval around the point estimate
#' ci <- boot_dist |>
#'   get_confidence_interval(point_estimate = point_estimate,
#'                           # at the 95% confidence level
#'                           level = .95,
#'                           # using the standard error method
#'                           type = "se")
#'
#'
#' # and plot it!
#' boot_dist |>
#'   visualize() +
#'   shade_confidence_interval(ci)
#'
#' # or just plot the bounds
#' boot_dist |>
#'   visualize() +
#'   shade_confidence_interval(ci, fill = NULL)
#'
#' # you can shade confidence intervals on top of
#' # theoretical distributions, too---the theoretical
#' # distribution will be recentered and rescaled to
#' # align with the confidence interval
#' sampling_dist <- gss |>
#'   specify(response = hours) |>
#'   assume(distribution = "t")
#'
#' visualize(sampling_dist) +
#'   shade_confidence_interval(ci)
#'
#' \donttest{
#' # to visualize distributions of coefficients for multiple
#' # explanatory variables, use a `fit()`-based workflow
#'
#' # fit 1000 linear models with the `hours` variable permuted
#' null_fits <- gss |>
#'  specify(hours ~ age + college) |>
#'  hypothesize(null = "independence") |>
#'  generate(reps = 1000, type = "permute") |>
#'  fit()
#'
#' null_fits
#'
#' # fit a linear model to the observed data
#' obs_fit <- gss |>
#'   specify(hours ~ age + college) |>
#'   fit()
#'
#' obs_fit
#'
#' # get confidence intervals for each term
#' conf_ints <-
#'   get_confidence_interval(
#'     null_fits,
#'     point_estimate = obs_fit,
#'     level = .95
#'   )
#'
#' # visualize distributions of coefficients
#' # generated under the null
#' visualize(null_fits)
#'
#' # add a confidence interval shading layer to juxtapose
#' # the null fits with the observed fit for each term
#' visualize(null_fits) +
#'   shade_confidence_interval(conf_ints)
#' }
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @name shade_confidence_interval
NULL

#' @rdname shade_confidence_interval
#' @family visualization functions
#' @export
shade_confidence_interval <- function(
  endpoints,
  color = "mediumaquamarine",
  fill = "turquoise",
  ...
) {
  # since most of the logic for shading is in shade_confidence_interval_term, which
  # is only called by `+.gg`, we need to check for mistakenly piped inputs here
  check_for_piped_visualize(endpoints, color, fill)

  # store inputs in classed output that can passed to a `ggplot_add` method
  structure(
    "A confidence interval shading layer.",
    class = "infer_layer",
    fn = "shade_confidence_interval",
    endpoints = if (is.null(endpoints)) {
      NA
    } else {
      endpoints
    },
    color = color,
    fill = list(fill),
    dots = list(...)
  )
}

shade_confidence_interval_term <- function(
  plot,
  endpoints,
  color = "mediumaquamarine",
  fill = "turquoise",
  dots,
  call = rlang::call2("shade_confidence_interval")
) {
  if (all(is.na(endpoints))) {
    endpoints <- NULL
  }

  # argument checking
  endpoints <- impute_endpoints(endpoints, plot, call = call)
  check_shade_confidence_interval_args(color, fill)

  if (is.null(endpoints)) {
    return(plot)
  }

  res <- list()

  if (!is.null(fill)) {
    # Making extra step of precomputing arguments in order to have default value
    # of `alpha = 0.6` overwritable in `...`
    rect_args <- c_dedupl(
      # Not overwritable arguments
      list(
        data = data.frame(endpoints[1]),
        mapping = aes(
          xmin = endpoints[1],
          xmax = endpoints[2],
          ymin = 0,
          ymax = Inf
        ),
        fill = fill,
        inherit.aes = FALSE
      ),
      # Extra arguments
      dots,
      # Default arguments that might be replaced in `...`
      list(alpha = 0.6)
    )
    rect_layer <- do.call(ggplot2::geom_rect, rect_args)

    res <- c(res, list(rect_layer))
  }

  segment_args <- c_dedupl(
    list(
      data = data.frame(x = endpoints),
      mapping = aes(x = x, xend = x, y = 0, yend = Inf),
      color = color,
      inherit.aes = FALSE
    ),
    dots,
    list(linewidth = 2)
  )
  segment_layer <- do.call(ggplot2::geom_segment, segment_args)

  res <- c(res, list(segment_layer))

  if (inherits(plot[["plot_env"]][["data"]], "infer_dist")) {
    plot <-
      redraw_theory_layer(
        plot,
        mean_shift = attr(endpoints, "point_estimate"),
        sd_shift = attr(endpoints, "se")
      ) +
      ggplot2::labs(
        title = "Rescaled Theoretical Distribution",
        x = "stat"
      )
  }

  plot + res
}

#' @rdname shade_confidence_interval
#' @export
shade_ci <- shade_confidence_interval


================================================
FILE: R/shade_p_value.R
================================================
#' Shade histogram area beyond an observed statistic
#'
#' @description
#'
#' `shade_p_value()` plots a p-value region on top of
#' [visualize()] output. The output is a ggplot2 layer that can be added with
#' `+`. The function has a shorter alias, `shade_pvalue()`.
#'
#' Learn more in `vignette("infer")`.
#'
#' @param obs_stat The observed statistic or estimate. For
#'   [calculate()]-based workflows, this will be a 1-element numeric vector or
#'   a `1 x 1` data frame containing the observed statistic.
#'   For [`fit()`][fit.infer()]-based workflows, a `(p + 1) x 2` data frame
#'   with columns `term` and `estimate` giving the observed estimate for
#'   each term.
#' @param direction A string specifying in which direction the shading should
#'   occur. Options are `"less"`, `"greater"`, or `"two-sided"`. Can
#'   also give `"left"`, `"right"`, `"both"`, `"two_sided"`, `"two sided"`,
#'   or `"two.sided"`. If `NULL`, the function will not shade any area.
#' @param color A character or hex string specifying the color of the observed
#'   statistic as a vertical line on the plot.
#' @param fill A character or hex string specifying the color to shade the
#'   p-value region. If `NULL`, the function will not shade any area.
#' @param ... Other arguments passed along to ggplot2 functions.
#'   For expert use only.
#'
#' @return If added to an existing infer visualization, a ggplot2
#'   object displaying the supplied statistic on top of its corresponding
#'   distribution. Otherwise, an `infer_layer` list.
#'
#'
#' @examples
#' # find the point estimate---mean number of hours worked per week
#' point_estimate <- gss |>
#'   specify(response = hours) |>
#'   hypothesize(null = "point", mu = 40) |>
#'   calculate(stat = "t")
#'
#' # ...and a null distribution
#' null_dist <- gss |>
#'   # ...we're interested in the number of hours worked per week
#'   specify(response = hours) |>
#'   # hypothesizing that the mean is 40
#'   hypothesize(null = "point", mu = 40) |>
#'   # generating data points for a null distribution
#'   generate(reps = 1000, type = "bootstrap") |>
#'   # estimating the null distribution
#'   calculate(stat = "t")
#'
#' # shade the p-value of the point estimate
#' null_dist |>
#'   visualize() +
#'   shade_p_value(obs_stat = point_estimate, direction = "two-sided")
#'
#' # you can shade confidence intervals on top of
#' # theoretical distributions, too!
#' null_dist_theory <- gss |>
#'   specify(response = hours) |>
#'   assume(distribution = "t")
#'
#' null_dist_theory |>
#'   visualize() +
#'   shade_p_value(obs_stat = point_estimate, direction = "two-sided")
#'
#' \donttest{
#' # to visualize distributions of coefficients for multiple
#' # explanatory variables, use a `fit()`-based workflow
#'
#' # fit 1000 linear models with the `hours` variable permuted
#' null_fits <- gss |>
#'  specify(hours ~ age + college) |>
#'  hypothesize(null = "independence") |>
#'  generate(reps = 1000, type = "permute") |>
#'  fit()
#'
#' null_fits
#'
#' # fit a linear model to the observed data
#' obs_fit <- gss |>
#'   specify(hours ~ age + college) |>
#'   fit()
#'
#' obs_fit
#'
#' # visualize distributions of coefficients
#' # generated under the null
#' visualize(null_fits)
#'
#' # add a p-value shading layer to juxtapose the null
#' # fits with the observed fit for each term
#' visualize(null_fits) +
#'   shade_p_value(obs_fit, direction = "both")
#'
#' # the direction argument will be applied
#' # to the plot for each term
#' visualize(null_fits) +
#'   shade_p_value(obs_fit, direction = "left")
#' }
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @name shade_p_value
NULL

#' @rdname shade_p_value
#' @family visualization functions
#' @export
shade_p_value <- function(
  obs_stat,
  direction,
  color = "red2",
  fill = "pink",
  ...
) {
  # since most of the logic for p-value shading is in shade_p_value_term, which
  # is only called by `+.gg`, we need to check for mistakenly piped inputs here
  check_for_piped_visualize(obs_stat, direction, color, fill)

  # store inputs in classed output that can passed to a `ggplot_add` method
  structure(
    "A p-value shading layer.",
    class = "infer_layer",
    fn = "shade_p_value",
    obs_stat = if (is.null(obs_stat)) {
      NA
    } else {
      obs_stat
    },
    direction = if (is.null(direction)) {
      NA
    } else {
      direction
    },
    color = color,
    fill = list(fill),
    dots = list(...)
  )
}

#' @rdname shade_p_value
#' @export
shade_pvalue <- shade_p_value

shade_p_value_term <- function(
  plot,
  obs_stat,
  direction,
  color = "red2",
  fill = "pink",
  dots,
  call = rlang::call2("shade_p_value")
) {
  if (all(is.na(obs_stat))) {
    obs_stat <- NULL
  }

  if (all(is.na(direction))) {
    direction <- NULL
  }

  # argument checking
  obs_stat <- check_obs_stat(obs_stat, plot, call = call)
  check_shade_p_value_args(obs_stat, direction, color, fill, call = call)

  term <- x_axis_label(plot)

  if (is.null(obs_stat)) {
    return(plot)
  }

  res <- list()

  # Add shading
  if (!is.null(direction) && !is.null(fill)) {
    if (direction %in% c("less", "left", "greater", "right")) {
      tail_area <- one_tail_area(obs_stat, direction)

      res <- c(res, do.call(geom_tail_area, c(list(tail_area, fill), dots)))
    } else if (
      direction %in%
        c("two_sided", "both", "two-sided", "two sided", "two.sided")
    ) {
      tail_area <- two_tail_area(obs_stat, direction)

      res <- c(res, do.call(geom_tail_area, c(list(tail_area, fill), dots)))
    } else {
      cli_warn(
        '`direction` should be one of `"less"`, `"left"`, `"greater"`, \\
         `"right"`, `"two-sided"`, `"both"`, `"two_sided"`, `"two sided"`, \\
         or `"two.sided"`.'
      )
    }
  }

  # Add vertical line at `obs_stat`
  # Making extra step of precomputing arguments in order to have default value
  # of `linewidth = 2` overwritable in `...`
  segment_args <- c_dedupl(
    # Not overwritable arguments
    list(
      # Address length-1 aesthetics warning by providing geom-specific data (#528)
      data = data.frame(obs_stat = obs_stat),
      # Here `aes()` is needed to force {ggplot2} to include segment in the plot
      mapping = aes(x = obs_stat, xend = obs_stat, y = 0, yend = Inf),
      color = color,
      inherit.aes = FALSE
    ),
    # Extra arguments
    dots,
    # Default arguments that might be replaced in `...`
    list(linewidth = 2)
  )
  segment_layer <- do.call(ggplot2::geom_segment, segment_args)

  res <- c(res, list(segment_layer))

  plot + res
}


check_shade_p_value_args <- function(
  obs_stat,
  direction,
  color,
  fill,
  call = caller_env()
) {
  if (!is.null(obs_stat)) {
    check_type(obs_stat, is.numeric, call = call)
  }
  if (!is.null(direction)) {
    check_type(direction, is.character, call = call)
  }
  check_type(color, is_color_string, "color string", call = call)
  check_type(fill, is_color_string, "color string", call = call)

  TRUE
}

geom_tail_area <- function(tail_data, fill, ...) {
  area_args <- c_dedupl(
    list(
      data = tail_data,
      mapping = aes(x = x, y = y, group = dir),
      fill = fill,
      show.legend = FALSE,
      inherit.aes = FALSE
    ),
    list(...),
    list(alpha = 0.6)
  )
  area_layer <- do.call(ggplot2::geom_area, area_args)

  list(area_layer)
}

two_tail_area <- function(obs_stat, direction) {
  # Take advantage of {ggplot2} functionality to accept function as `data`.
  # This is needed to make possible existence of `shade_p_value()` in case of
  # `direction = "both"`, as it depends on actual `data` but adding it as
  # argument to `shade_p_value()` is very bad.
  # Also needed to warn about incorrect usage of right tail tests.
  function(data) {
    warn_right_tail_test(direction, short_theory_type(data))

    if (get_viz_method(data) == "theoretical") {
      second_border <- -obs_stat
    } else {
      second_border <- mirror_obs_stat(data$stat, obs_stat)
    }

    left_area <- one_tail_area(
      min(obs_stat, second_border),
      "left",
      do_warn = FALSE
    )(data)
    right_area <- one_tail_area(
      max(obs_stat, second_border),
      "right",
      do_warn = FALSE
    )(data)

    ret <- dplyr::bind_rows(left_area, right_area)

    # jitter one of the x coords that the right and left area have in common
    # so that their heights aren't summed
    common_x <- which.max(ret$x[ret$dir == "left"])

    ret$x[common_x] <- ret$x[common_x] - 1e-5 * ret$x[common_x]

    ret
  }
}

one_tail_area <- function(obs_stat, direction, do_warn = TRUE) {
  # Take advantage of {ggplot2} functionality to accept function as `data`.
  function(data) {
    warn_right_tail_test(direction, short_theory_type(data), do_warn)

    norm_dir <- norm_direction(direction)
    viz_method <- get_viz_method(data)

    # Compute grid points for upper bound of shading area
    switch(
      viz_method,
      theoretical = theor_area(data, obs_stat, norm_dir),
      simulation = hist_area(data, obs_stat, norm_dir, yval = "ymax"),
      both = hist_area(data, obs_stat, norm_dir, yval = "density")
    )
  }
}

theor_area <- function(data, obs_stat, direction, n_grid = 1001) {
  plot_data <- create_plot_data(data)

  g <- ggplot(plot_data) + theoretical_layer(data, "black", do_warn = FALSE)
  g_data <- ggplot2::ggplot_build(g)[["data"]][[1]]

  curve_fun <- stats::approxfun(
    x = g_data[["x"]],
    y = g_data[["y"]],
    yleft = 0,
    yright = 0
  )

  # Compute "x" grid of curve, area under which will be shaded.
  x_grid <- switch(
    # `direction` can be one of "left" or "right" at this point of execution
    direction,
    left = seq(from = min(g_data[["x"]]), to = obs_stat, length.out = n_grid),
    right = seq(from = obs_stat, to = max(g_data[["x"]]), length.out = n_grid)
  )

  tibble::tibble(x = x_grid, y = curve_fun(x_grid), dir = direction)
}

hist_area <- function(data, obs_stat, direction, yval) {
  g <- ggplot(data) + simulation_layer(data)
  g_data <- ggplot2::ggplot_build(g)[["data"]][[1]]

  # Compute knots for step function representing histogram bars and space
  # between them.
  # "x" coordinates are computed from `x_left` and `x_right`: "x" coordinates
  # of "shrinked" (to avoid duplicte points later) histogram bars.
  x_left <- (1 - 1e-5) * g_data[["xmin"]] + 1e-5 * g_data[["xmax"]]
  x_right <- 1e-5 * g_data[["xmin"]] + (1 - 1e-5) * g_data[["xmax"]]
  # `x` is created as `c(x_left[1], x_right[1], x_left[2], ...)`
  x <- c(t(cbind(x_left, x_right)))

  # "y" coordinates represent values of future `stepfun(..., right = FALSE)`
  # outputs between `x` knots. That is:
  # y[1] is value inside [-Inf, x_left[1]) (zero),
  # y[2] - value inside [x_left[1], x_right[1]) (height of first histogram bar),
  # y[3] - value inside [x_right[1], x_left[2]) (zero), and so on.
  y <- c(0, t(cbind(g_data[[yval]], 0)))

  # Output step function should evaluate to histogram bar heights on both
  # corresponding ends, i.e. `curve_fun(c(x_left[1], x_right[1]))` should return
  # vector of length two with heights of first histogram bar. `stepfun()` treats
  # input `x` as consequtive semi-open intervals. To achieve effect of closed
  # intervals, `pmax()` trick is used.
  curve_fun <- function(t) {
    pmax(
      stats::stepfun(x, y, right = FALSE)(t),
      stats::stepfun(x, y, right = TRUE)(t)
    )
  }

  # "True" left and right "x" coordinates of histogram bars are added to achieve
  # "almost vertical" lines with `geom_area()` usage. If don't do this, then
  # area might be shaded under line segments connecting edges of consequtive
  # histogram bars.
  x_extra <- switch(
    direction,
    left = g_data[["xmax"]],
    right = g_data[["xmin"]]
  )
  x_extra <- sort(c(x, x_extra))
  x_grid <- switch(
    # `direction` can be one of "left" or "right" at this point of execution
    direction,
    left = c(x_extra[x_extra < obs_stat], obs_stat),
    right = c(obs_stat, x_extra[x_extra > obs_stat])
  )

  # if area will have area 0, return 0-length tibble to trigger
  # `ggplot:::empty()` edge case (#528)
  if (length(x_grid) == 1) {
    return(tibble::tibble(x = numeric(0), y = numeric(0), dir = character(0)))
  }

  tibble::tibble(x = x_grid, y = curve_fun(x_grid), dir = direction)
}

norm_direction <- function(direction) {
  switch(
    direction,
    less = ,
    left = "left",
    greater = ,
    right = "right",
    two_sided = ,
    `two-sided` = ,
    `two sided` = ,
    `two.sided` = ,
    both = "both"
  )
}

warn_right_tail_test <- function(direction, stat_name, do_warn = TRUE) {
  if (
    do_warn &&
      !is.null(direction) &&
      !(direction %in% c("greater", "right")) &&
      (stat_name %in% c("F", "Chi-Square"))
  ) {
    cli_warn(
      "{stat_name} usually corresponds to right-tailed tests. \\
       Proceed with caution."
    )
  }

  TRUE
}

mirror_obs_stat <- function(vector, observation) {
  obs_percentile <- stats::ecdf(vector)(observation)

  stats::quantile(vector, probs = 1 - obs_percentile)
}


================================================
FILE: R/specify.R
================================================
#' Specify response and explanatory variables
#'
#' @description
#'
#' `specify()` is used to specify which columns in the supplied data frame are
#' the relevant response (and, if applicable, explanatory) variables. Note that
#' character variables are converted to `factor`s.
#'
#' Learn more in `vignette("infer")`.
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @param formula A formula with the response variable on the left and the
#'   explanatory on the right. Alternatively, a `response` and `explanatory`
#'   argument can be supplied.
#' @param response The variable name in `x` that will serve as the response.
#'   This is an alternative to using the `formula` argument.
#' @param explanatory The variable name in `x` that will serve as the
#'   explanatory variable. This is an alternative to using the formula argument.
#' @param success The level of `response` that will be considered a success, as
#'   a string. Needed for inference on one proportion, a difference in
#'   proportions, and corresponding z stats.
#'
#' @return A tibble containing the response (and explanatory, if specified)
#'   variable data.
#'
#' @examples
#' # specifying for a point estimate on one variable
#' gss |>
#'    specify(response = age)
#'
#' # specify a relationship between variables as a formula...
#' gss |>
#'   specify(age ~ partyid)
#'
#' # ...or with named arguments!
#' gss |>
#'   specify(response = age, explanatory = partyid)
#'
#' # more in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @importFrom rlang f_lhs f_rhs get_expr caller_env
#' @importFrom dplyr select any_of across
#' @importFrom methods hasArg
#' @family core functions
#' @export
specify <- function(
  x,
  formula,
  response = NULL,
  explanatory = NULL,
  success = NULL
) {
  check_type(x, is.data.frame)

  # Standardize variable types
  x <- standardize_variable_types(x)

  # Parse response and explanatory variables
  response <- enquo(response)
  explanatory <- enquo(explanatory)

  x <- parse_variables(x, formula, response, explanatory)

  # Add attributes
  attr(x, "success") <- success
  attr(x, "generated") <- FALSE
  attr(x, "hypothesized") <- FALSE
  attr(x, "fitted") <- FALSE

  # Check the success argument
  check_success_arg(x, success)

  # Select variables
  x <- x |>
    select(any_of(c(response_name(x), explanatory_name(x))))

  is_complete <- stats::complete.cases(x)
  if (!all(is_complete)) {
    x <- dplyr::filter(x, is_complete)
    cli_warn("Removed {sum(!is_complete)} rows containing missing values.")
  }

  # Add "infer" class
  append_infer_class(x)
}

parse_variables <- function(
  x,
  formula,
  response,
  explanatory,
  call = caller_env()
) {
  if (methods::hasArg(formula)) {
    tryCatch(
      rlang::is_formula(formula),
      error = function(e) {
        cli_abort(
          c(
            "The argument you passed in for the formula does not exist.",
            i = "Were you trying to pass in an unquoted column name?",
            i = "Did you forget to name one or more arguments?"
          ),
          call = call
        )
      }
    )
    if (!rlang::is_formula(formula)) {
      cli_abort(
        c(
          "The first unnamed argument must be a formula.",
          i = "You passed in '{get_type(formula)}'.",
          x = "Did you forget to name one or more arguments?"
        ),
        call = call
      )
    }
  }

  attr(x, "response") <- get_expr(response)
  attr(x, "explanatory") <- get_expr(explanatory)
  attr(x, "formula") <- NULL

  if (methods::hasArg(formula)) {
    attr(x, "response") <- f_lhs(formula)
    attr(x, "explanatory") <- f_rhs(formula)
    attr(x, "formula") <- formula
  }

  # Check response and explanatory variables to be appropriate for later use
  if (!has_response(x)) {
    cli_abort(
      "Please supply a response variable that is not `NULL`.",
      call = call
    )
  }

  check_var_correct(x, "response", call = call)
  check_var_correct(x, "explanatory", call = call)

  # If there's an explanatory var
  check_vars_different(x, call = call)

  if (!has_attr(x, "response")) {
    attr(x, "response_type") <- NULL
  } else {
    attr(x, "response_type") <- class(response_variable(x))
  }

  if (!has_attr(x, "explanatory")) {
    attr(x, "explanatory_type") <- NULL
  } else {
    attr(x, "explanatory_type") <-
      purrr::map_chr(as.data.frame(explanatory_variable(x)), class)
  }

  attr(x, "type_desc_response") <- determine_variable_type(x, "response")
  attr(x, "type_desc_explanatory") <- determine_variable_type(x, "explanatory")

  # Determine params for theoretical fit
  x <- set_params(x)

  x
}

check_success_arg <- function(x, success, call = caller_env()) {
  response_col <- response_variable(x)

  if (!is.null(success)) {
    if (!is.character(success)) {
      cli_abort("`success` must be a string.", call = call)
    }
    if (!is.factor(response_col)) {
      cli_abort(
        "`success` should only be specified if the response is a categorical \\
         variable.",
        call = call
      )
    }
    if (!(success %in% levels(response_col))) {
      cli_abort(
        '{success} is not a valid level of {response_name(x)}.',
        call = call
      )
    }
    if (sum(table(response_col) > 0) > 2) {
      cli_abort(
        "`success` can only be used if the response has two levels. \\
         `filter()` can reduce a variable to two levels.",
        call = call
      )
    }
  }

  if (
    (attr(x, "response_type") == "factor" &&
      is.null(success) &&
      length(levels(response_variable(x))) == 2) &&
      ((!has_attr(x, "explanatory_type") ||
        length(levels(explanatory_variable(x))) == 2))
  ) {
    cli_abort(
      'A level of the response variable `{response_name(x)}` needs to be \\
        specified for the `success` argument in `specify()`.',
      call = call
    )
  }
}

check_var_correct <- function(x, var_name, call = caller_env()) {
  var <- attr(x, var_name)

  # Variable (if present) should be a symbolic column name
  if (!is.null(var)) {
    if (!rlang::is_symbolic(var)) {
      cli_abort(
        "The {var_name} should be a bare variable name (not a string in \\
         quotation marks).",
        call = call
      )
    }

    if (any(!(all.vars(var) %in% names(x)))) {
      cli_abort(
        'The {var_name} variable `{var}` cannot be found in this dataframe.',
        call = call
      )
    }
  }

  TRUE
}

check_vars_different <- function(x, call = caller_env()) {
  if (has_response(x) && has_explanatory(x)) {
    if (identical(response_name(x), explanatory_name(x))) {
      cli_abort(
        "The response and explanatory variables must be different from one \\
         another.",
        call = call
      )
    }
  }

  TRUE
}


================================================
FILE: R/utils.R
================================================
# Miscellaneous Helpers -----------------------------------------------
suppress_infer_messages <- function() {
  identical(Sys.getenv("SUPPRESS_INFER_MESSAGES"), "true")
}

append_infer_class <- function(x) {
  x_cl <- class(x)
  if (x_cl[1] != "infer") {
    class(x) <- c("infer", x_cl)
  }

  x
}

format_params <- function(x) {
  par_levels <- get_par_levels(x)
  fct_levels <- as.character(unique(response_variable(x)))
  attr(x, "params")[match(fct_levels, par_levels)]
}

print_params <- function(x) {
  params <- attr(x, "params")

  switch(
    as.character(length(params)),
    "1" = glue(": `{names(params)} = {unname(params)}`", .null = "NULL"),
    "2" = glue(": `p = .5`", .null = "NULL"),
    glue("s: `p = c({put_params(x, params)})`", .null = "NULL")
  )
}

put_params <- function(x, params) {
  paste0(get_par_levels(x), " = ", params, collapse = ", ")
}

get_par_levels <- function(x) {
  par_names <- names(attr(x, "params"))
  gsub("^.\\.", "", par_names)
}

copy_attrs <- function(
  to,
  from,
  attrs = c(
    "response",
    "success",
    "explanatory",
    "response_type",
    "explanatory_type",
    "distr_param",
    "distr_param2",
    "null",
    "params",
    "theory_type",
    "generated",
    "type",
    "hypothesized",
    "formula",
    "fitted",
    "type_desc_response",
    "type_desc_explanatory"
  )
) {
  for (at in attrs) {
    attr(to, at) <- attr(from, at)
  }

  to
}

# Wrapper for deduplication by name after doing `c(...)`
c_dedupl <- function(...) {
  l <- c(...)

  l_names <- names(l)

  if (is.null(l_names)) {
    l
  } else {
    l[!duplicated(l_names) | (l_names == "")]
  }
}

reorder_explanatory <- function(x, order) {
  x[[explanatory_name(x)]] <- factor(
    explanatory_variable(x),
    levels = c(order[1], order[2])
  )
  x
}

standardize_variable_types <- function(x) {
  tibble::as_tibble(x) |>
    # character and ordered to factor
    dplyr::mutate(
      dplyr::across(
        where(~ is.character(.x) || is.ordered(.x)),
        ~ factor(.x, ordered = FALSE)
      )
    ) |>
    # logical to factor, with TRUE as the first level
    dplyr::mutate(
      dplyr::across(
        where(~ is.logical(.x)),
        ~ factor(.x, levels = c("TRUE", "FALSE"))
      )
    ) |>
    # integer to numeric
    dplyr::mutate(
      dplyr::across(
        where(is.integer),
        as.numeric
      )
    )
}

# Performant grouping ----------------------------------------------------------
group_by_replicate <- function(tbl, reps, n) {
  dplyr::new_grouped_df(
    tbl,
    groups = make_replicate_groups(tbl, reps = reps, n = n)
  )
}


make_replicate_groups <- function(tbl, reps, n) {
  res <-
    tibble::new_tibble(list(
      replicate = 1:reps,
      .rows = vctrs::as_list_of(
        vctrs::vec_chop(seq_len(n * reps), sizes = rep(n, reps)),
        .ptype = integer()
      )
    ))

  attr(res, ".drop") <- TRUE

  res
}

# Getters, setters, and indicators ------------------------------------------
explanatory_expr <- function(x) {
  attr(x, "explanatory")
}

explanatory_name <- function(x) {
  all.vars(explanatory_expr(x))
}

# if there is more than one explanatory variable, return a data frame.
# if there's one, return a vector. otherwise, return NULL.
explanatory_variable <- function(x) {
  if (!is.null(explanatory_expr(x))) {
    if (length(explanatory_name(x)) > 1) {
      x[explanatory_name(x)]
    } else {
      x[[explanatory_name(x)]]
    }
  } else {
    NULL
  }
}

response_expr <- function(x) {
  attr(x, "response")
}

response_name <- function(x) {
  as.character(response_expr(x))
}

response_variable <- function(x) {
  x[[response_name(x)]]
}

theory_type <- function(x) {
  attr(x, "theory_type")
}

get_response_levels <- function(x) {
  as.character(unique(response_variable(x)))
}

get_success_then_response_levels <- function(x) {
  success_attr <- attr(x, "success")
  response_levels <- setdiff(
    get_response_levels(x),
    success_attr
  )
  c(success_attr, response_levels)
}

is_generated <- function(x) {
  isTRUE(attr(x, "generated"))
}

is_hypothesized <- function(x) {
  isTRUE(attr(x, "hypothesized"))
}

is_fitted <- function(x) {
  isTRUE(attr(x, "fitted"))
}

is_mlr <- function(x) {
  length(explanatory_name(x)) > 1
}

has_attr <- function(x, at) {
  !is.null(attr(x, at, exact = TRUE))
}

has_explanatory <- function(x) {
  has_attr(x, "explanatory")
}

has_response <- function(x) {
  has_attr(x, "response")
}

is_color_string <- function(x) {
  rlang::is_string(x) &&
    tryCatch(is.matrix(grDevices::col2rgb(x)), error = function(e) {
      FALSE
    })
}

is_single_number <- function(
  x,
  min_val = -Inf,
  max_val = Inf,
  include_min_val = TRUE,
  include_max_val = TRUE
) {
  left_compare <- if (include_min_val) {
    `>=`
  } else {
    `>`
  }
  right_compare <- if (include_max_val) {
    `<=`
  } else {
    `<`
  }

  is.numeric(x) &&
    (length(x) == 1) &&
    is.finite(x) &&
    left_compare(x, min_val) &&
    right_compare(x, max_val)
}

is_truefalse <- function(x) {
  identical(x, TRUE) || identical(x, FALSE)
}

# Helpers for test statistics --------------------------------------

# Simplify and standardize checks by grouping statistics based on variable types
# num = numeric, bin = binary (dichotomous), mult = multinomial
stat_types <- tibble::tribble(
  ~resp,
  ~exp,
  ~stats,
  "num",
  "",
  c("mean", "median", "sum", "sd", "t"),
  "num",
  "num",
  c("slope", "correlation"),
  "num",
  "bin",
  c("diff in means", "diff in medians", "t", "ratio of means"),
  "num",
  "mult",
  c("F"),
  "bin",
  "",
  c("prop", "count", "z"),
  "bin",
  "bin",
  c("diff in props", "z", "ratio of props", "odds ratio", "Chisq"),
  "bin",
  "mult",
  c("Chisq"),
  "mult",
  "bin",
  c("Chisq"),
  "mult",
  "",
  c("Chisq"),
  "mult",
  "mult",
  c("Chisq"),
)

stat_type_desc <- tibble::tribble(
  ~type,
  ~description,
  "num",
  "numeric",
  "bin",
  "dichotomous categorical",
  "mult",
  "multinomial categorical"
)

get_stat_type_desc <- function(stat_type) {
  stat_type_desc$description[stat_type_desc$type == stat_type]
}

stat_desc <- tibble::tribble(
  ~stat,
  ~description,
  "mean",
  "A mean",
  "median",
  "A median",
  "sum",
  "A sum",
  "sd",
  "A standard deviation",
  "prop",
  "A proportion",
  "count",
  "A count",
  "diff in means",
  "A difference in means",
  "diff in medians",
  "A difference in medians",
  "diff in props",
  "A difference in proportions",
  "Chisq",
  "A chi-square statistic",
  "F",
  "An F statistic",
  "slope",
  "A slope",
  "correlation",
  "A correlation",
  "t",
  "A t statistic",
  "z",
  "A z statistic",
  "ratio of props",
  "A ratio of proportions",
  "ratio of means",
  "A ratio of means",
  "odds ratio",
  "An odds ratio"
)

stat_hypotheses <- tibble::tribble(
  ~stat,
  ~hypothesis,
  "mean",
  "point",
  "median",
  "point",
  "sum",
  "point",
  "sd",
  "point",
  "prop",
  "point",
  "count",
  "point",
  "mean",
  "paired independence",
  "median",
  "paired independence",
  "sum",
  "paired independence",
  "sd",
  "paired independence",
  "diff in means",
  "independence",
  "diff in medians",
  "independence",
  "diff in props",
  "independence",
  "Chisq",
  "independence",
  "Chisq",
  "point",
  "F",
  "independence",
  "slope",
  "independence",
  "correlation",
  "independence",
  "t",
  "independence",
  "t",
  "point",
  "z",
  "independence",
  "z",
  "point",
  "ratio of props",
  "independence",
  "ratio of means",
  "independence",
  "odds ratio",
  "independence"
)

get_stat_desc <- function(stat) {
  stat_desc$description[stat_desc$stat == stat]
}

# Values of `stat` argument of `calculate()`
implemented_stats <- c(
  "mean",
  "median",
  "sum",
  "sd",
  "prop",
  "count",
  "diff in means",
  "diff in medians",
  "diff in props",
  "Chisq",
  "F",
  "slope",
  "correlation",
  "t",
  "z",
  "ratio of props",
  "ratio of means",
  "odds ratio"
)

implemented_stats_aliases <- tibble::tribble(
  ~alias,
  ~target,
  # Allow case insensitive stat names
  "f",
  "F",
  "chisq",
  "Chisq"
)

untheorized_stats <- implemented_stats[
  !implemented_stats %in%
    c(
      "Chisq",
      "F",
      "t",
      "z"
    )
]

# Given a statistic and theory type, assume a reasonable null
p_null <- function(x) {
  lvls <- levels(response_variable(x))
  num_lvls <- length(lvls)
  probs <- 1 / num_lvls

  setNames(rep(probs, num_lvls), paste0("p.", lvls))
}

# The "null_fn" column is a function(x) whose output gives attr(x, "params")
theorized_nulls <- tibble::tribble(
  ~stat,
  ~null_fn,
  "Chisq",
  p_null,
  "t",
  function(x) {
    setNames(0, "mu")
  },
  "z",
  p_null
)

determine_variable_type <- function(x, variable) {
  var <- switch(
    variable,
    response = response_variable(x),
    explanatory = explanatory_variable(x)
  )

  if (is.null(var)) {
    ""
  } else if (inherits(var, "numeric")) {
    "num"
  } else if (length(unique(var)) == 2) {
    "bin"
  } else {
    "mult"
  }
}

# Argument checking --------------------------------------------------------

check_order <- function(
  x,
  order,
  in_calculate = TRUE,
  stat,
  call = caller_env()
) {
  # If there doesn't need to be an order argument, warn if there is one,
  # and otherwise, skip checks
  if (
    !(theory_type(x) %in%
      c("Two sample props z", "Two sample t") ||
      is.null(stat) ||
      stat %in%
        c(
          "diff in means",
          "diff in medians",
          "diff in props",
          "ratio of props",
          "odds ratio"
        ))
  ) {
    if (!is.null(order)) {
      cli_warn(
        "Statistic is not based on a difference or ratio; the `order` argument \\
          will be ignored. Check {.help [{.fun calculate}](infer::calculate)} \\
          for details."
      )
    } else {
      return(order)
    }
  }

  explanatory_variable <- explanatory_variable(x)
  unique_ex <- sort(unique(explanatory_variable))

  if (is.null(order) & in_calculate) {
    # Default to subtracting/dividing the first (alphabetically) level by the
    # second, unless the explanatory variable is a factor (in which case order
    # is preserved); raise a warning if this was done implicitly.
    order <- as.character(unique_ex)
    cli_warn(
      "The statistic is based on a difference or ratio; by default, for \\
       difference-based statistics, the explanatory variable is subtracted \\
       in the order \"{unique_ex[1]}\" - \"{unique_ex[2]}\", or divided in \\
       the order \"{unique_ex[1]}\" / \"{unique_ex[2]}\" for ratio-based \\
       statistics. To specify this order yourself, supply `order = \\
       c(\"{unique_ex[1]}\", \"{unique_ex[2]}\")` to the calculate() function."
    )
  } else if (is.null(order)) {
    order <- as.character(unique_ex)
    cli_warn(
      "The statistic is based on a difference or ratio; by default, for \\
       difference-based statistics, the explanatory variable is subtracted \\
       in the order \"{unique_ex[1]}\" - \"{unique_ex[2]}\", or divided in \\
       the order \"{unique_ex[1]}\" / \"{unique_ex[2]}\" for ratio-based \\
       statistics. To specify this order yourself, supply `order = \\
       c(\"{unique_ex[1]}\", \"{unique_ex[2]}\")`."
    )
  } else {
    if (xor(is.na(order[1]), is.na(order[2]))) {
      cli_abort(
        "Only one level specified in `order`. Both levels need to be specified.",
        call = call
      )
    }
    if (length(order) > 2) {
      cli_abort(
        "`order` is expecting only two entries.",
        call = call
      )
    }
    if (order[1] %in% unique_ex == FALSE) {
      cli_abort(
        "{order[1]} is not a level of the explanatory variable.",
        call = call
      )
    }
    if (order[2] %in% unique_ex == FALSE) {
      cli_abort(
        "{order[2]} is not a level of the explanatory variable.",
        call = call
      )
    }
  }
  # return the order as given (unless the argument was invalid or NULL)
  order
}

check_point_params <- function(x, stat, call = caller_env()) {
  param_names <- attr(attr(x, "params"), "names")
  hyp_text <- 'to be set in `hypothesize()`.'
  if (
    is_hypothesized(x) && !identical(attr(x, "null"), "paired independence")
  ) {
    if (stat %in% c("mean", "median", "sd", "prop")) {
      if ((stat == "mean") && !("mu" %in% param_names)) {
        cli_abort('`stat == "mean"` requires `"mu"` {hyp_text}', call = call)
      }
      if (!(stat == "mean") && ("mu" %in% param_names)) {
        cli_abort(
          '`"mu"` does not correspond to `stat = "{stat}"`.',
          call = call
        )
      }
      if ((stat == "median") && !("med" %in% param_names)) {
        cli_abort('`stat == "median"` requires `"med"` {hyp_text}', call = call)
      }
      if (!(stat == "median") && ("med" %in% param_names)) {
        cli_abort(
          '`"med"` does not correspond to `stat = "{stat}"`.',
          call = call
        )
      }
    }
  }
}

# This function checks for NaNs in the output of `calculate` and raises
# a message/warning/error depending on the context in which it was called.
check_for_nan <- function(x, context) {
  if (inherits(x, "infer_dist")) {
    return(x)
  }

  stat_is_nan <- is.nan(x[["stat"]])
  num_nans <- sum(stat_is_nan)
  # If there are no NaNs, continue on as normal :-)
  if (num_nans == 0) {
    return(x)
  }

  calc_ref <- c(
    i = "See {.help [{.fun calculate}](infer::calculate)} for more details."
  )
  # If all of the data is NaN, raise an error
  if (num_nans == nrow(x)) {
    cli_abort(
      c("All calculated statistics were `NaN`.", calc_ref),
      call = NULL
    )
  }

  stats_were <- if (num_nans == 1) {
    "statistic was"
  } else {
    "statistics were"
  }
  num_nans_msg <- glue::glue("{num_nans} calculated {stats_were} `NaN`")

  if (context == "visualize") {
    # Raise a warning and plot the data with NaNs removed
    cli_warn(
      c(
        "{num_nans_msg}. `NaN`s have been omitted from visualization.",
        calc_ref
      )
    )
    return(x[!stat_is_nan, ])
  } else if (context == "get_p_value") {
    # Raise an error
    cli_abort(
      c(
        "{num_nans_msg}. Simulation-based p-values are not well-defined for \\
       null distributions with non-finite values.",
        calc_ref
      ),
      call = NULL
    )
  }
}

check_direction <- function(
  direction = c(
    "less",
    "greater",
    "two_sided",
    "left",
    "right",
    "both",
    "two-sided",
    "two sided",
    "two.sided"
  ),
  call = caller_env()
) {
  check_type(direction, is.character, call = call)

  if (
    !(direction %in%
      c(
        "less",
        "greater",
        "two_sided",
        "left",
        "right",
        "both",
        "two-sided",
        "two sided",
        "two.sided"
      ))
  ) {
    cli_abort(
      'The provided value for `direction` is not appropriate. Possible values \\
      are "less", "greater", "two-sided", "left", "right", "both", \
      "two_sided", "two sided", or "two.sided".',
      call = call
    )
  }
}

check_obs_stat <- function(obs_stat, plot = NULL, call = caller_env()) {
  if (!is.null(obs_stat)) {
    if ("data.frame" %in% class(obs_stat)) {
      if (is_fitted(obs_stat)) {
        x_lab <- x_axis_label(plot)

        obs_stat <-
          obs_stat |>
          dplyr::filter(term == x_lab) |>
          dplyr::pull(estimate)

        return(obs_stat)
      }

      check_type(obs_stat, is.data.frame, call = call)
      if ((nrow(obs_stat) != 1) || (ncol(obs_stat) != 1)) {
        cli_warn(
          "The first row and first column value of the given `obs_stat` will \\
           be used."
        )
      }

      # [[1]] is used in case `stat` is not specified as name of 1x1
      obs_stat <- obs_stat[[1]][[1]]
      check_type(obs_stat, is.numeric, call = call)
    } else {
      check_type(obs_stat, is.numeric, call = call)
    }
  }

  obs_stat
}

check_mlr_x_and_obs_stat <- function(
  x,
  obs_stat,
  fn,
  arg,
  call = caller_env()
) {
  if (!is_fitted(obs_stat)) {
    cli_abort(
      c(
        "The `{arg}` argument should be the output of `fit()`.",
        i = "See the documentation with `?{fn}`."
      ),
      call = call
    )
  }

  if (!is_generated(x)) {
    cli_abort(
      "The `x` argument needs to be passed to `generate()` before `fit()`.",
      call = call
    )
  }

  if (
    any(!unique(x$term) %in% unique(obs_stat$term)) ||
      any(!unique(obs_stat$term) %in% unique(x$term))
  ) {
    cli_abort(
      "The explanatory variables used to generate the distribution of \\
       null fits are not the same used to fit the observed data.",
      call = call
    )
  }

  if (response_name(x) != response_name(obs_stat)) {
    cli_abort(
      "The response variable of the null fits ({response_name(x)}) is not \\
       the same as that of the observed fit ({response_name(obs_stat)}).",
      call = call
    )
  }

  invisible(TRUE)
}

#' Check object type
#'
#' Throw an error in case object is not of desired type.
#'
#' @param x An object to check.
#' @param predicate A function to perform check or a formula (as input for
#'   `rlang::as_function()`). A good idea is to use function named `is.*()` or
#'   `is_*()` with possible `<package>::` prefix.
#' @param type_name A string for desired type name. If `NULL`, type is taken
#'   from parsing original name of supplied `predicate`: all alphanumeric with
#'   '_' and '.' characters (until the name end) after the first appearance of
#'   either `is.` or `is_`. In case of a doubt supply `type_name` explicitly.
#' @param x_name String to be used as variable name instead of supplied one
#'   (default).
#' @param allow_null If `TRUE` then error isn't thrown if `x` is `NULL`, no
#'   matter what `predicate(x)` returns.
#' @param ... Arguments to be passed to `predicate`.
#'
#' @examples
#' \donttest{
#' x <- 1
#' check_type(x, is.numeric)
#' check_type(x, is.logical)
#' check_type(x, rlang::is_string, "character of length 1")
#' check_type(
#'   x,
#'   ~ is.character(.) && (length(.) == 1),
#'   "character of length 1"
#' )
#' }
#'
#' @keywords internal
#' @noRd
check_type <- function(
  x,
  predicate,
  type_name = NULL,
  x_name = NULL,
  allow_null = FALSE,
  ...,
  call = caller_env()
) {
  if (is.null(x_name)) {
    x_name <- deparse(substitute(x))
  }

  if (is.null(type_name)) {
    predicate_name <- deparse(rlang::enexpr(predicate))
    type_name <- parse_type(predicate_name)
  }

  predicate <- rlang::as_function(predicate)

  is_pred_true <- (allow_null && is.null(x)) || isTRUE(predicate(x, ...))

  if (!is_pred_true) {
    # Not using "must be of type" because of 'tibble' and 'string' cases
    cli_abort(
      "`{x_name}` must be '{type_name}', not '{get_type(x)}'.",
      call = call
    )
  }

  x
}

# This function is needed because `typeof()` on data frame returns "list"
get_type <- function(x) {
  if (is.data.frame(x)) {
    return("data.frame")
  }

  typeof(x)
}

parse_type <- function(f_name) {
  res <- regmatches(
    f_name,
    regexec("is[_\\.]([[:alnum:]_\\.]+)$", f_name)
  )[[1]][2]

  if (is.na(res)) {
    res <- f_name
  }

  res
}

check_is_distribution <- function(x, fn, call = caller_env()) {
  if (!any(inherits(x, "infer_dist") || is.data.frame(x))) {
    cli_abort(
      "The `x` argument to `{fn}()` must be an infer distribution, \\
       outputted by `assume()` or `calculate()`.",
      call = call
    )
  }
}


================================================
FILE: R/visualize.R
================================================
#' @importFrom ggplot2 ggplot_add
#' @export
ggplot2::ggplot_add

#' Visualize statistical inference
#'
#' @description
#'
#' Visualize the distribution of the simulation-based inferential statistics or
#' the theoretical distribution (or both!).
#'
#' Learn more in `vignette("infer")`.
#'
#' @param data A distribution. For simulation-based inference, a data frame
#'   containing a distribution of [calculate()]d statistics
#'   or [`fit()`][fit.infer()]ted coefficient estimates. This object should
#'   have been passed to [generate()] before being supplied or
#'   [calculate()] to [`fit()`][fit.infer()]. For theory-based inference,
#'   the output of [assume()].
#' @param bins The number of bins in the histogram.
#' @param method A string giving the method to display. Options are
#'   `"simulation"`, `"theoretical"`, or `"both"` with `"both"` corresponding to
#'   `"simulation"` and `"theoretical"`. If `data` is the output of [assume()],
#'   this argument will be ignored and default to `"theoretical"`.
#' @param dens_color A character or hex string specifying the color of the
#'   theoretical density curve.
#' @param ... Additional arguments passed along to functions in ggplot2.
#'   For `method = "simulation"`, `stat_bin()`, and for `method = "theoretical"`,
#'   `geom_path()`. Some values may be overwritten by infer internally.
#'
#' @details In order to make the visualization workflow more straightforward
#' and explicit, `visualize()` now only should be used to plot distributions
#' of statistics directly. A number of arguments related to shading p-values and
#' confidence intervals are now deprecated in `visualize()` and should
#' now be passed to [shade_p_value()] and [shade_confidence_interval()],
#' respectively. [visualize()] will raise a warning if deprecated arguments
#' are supplied.
#'
#' @return
#'
#' For [calculate()]-based workflows, a ggplot showing the simulation-based
#' distribution as a histogram or bar graph. Can also be used to display
#' theoretical distributions.
#'
#' For [assume()]-based workflows, a ggplot showing the theoretical distribution.
#'
#' For [`fit()`][fit.infer()]-based workflows, a `patchwork` object
#' showing the simulation-based distributions as a histogram or bar graph.
#' The interface to adjust plot options and themes is a bit different
#' for `patchwork` plots than ggplot2 plots. The examples highlight the
#' biggest differences here, but see [patchwork::plot_annotation()] and
#' [patchwork::&.gg] for more details.
#'
#' @seealso [shade_p_value()], [shade_confidence_interval()].
#'
#' @examples
#'
#' # generate a null distribution
#' null_dist <- gss |>
#'   # we're interested in the number of hours worked per week
#'   specify(response = hours) |>
#'   # hypothesizing that the mean is 40
#'   hypothesize(null = "point", mu = 40) |>
#'   # generating data points for a null distribution
#'   generate(reps = 1000, type = "bootstrap") |>
#'   # calculating a distribution of means
#'   calculate(stat = "mean")
#'
#' # or a bootstrap distribution, omitting the hypothesize() step,
#' # for use in confidence intervals
#' boot_dist <- gss |>
#'   specify(response = hours) |>
#'   generate(reps = 1000, type = "bootstrap") |>
#'   calculate(stat = "mean")
#'
#' # we can easily plot the null distribution by piping into visualize
#' null_dist |>
#'   visualize()
#'
#' # we can add layers to the plot as in ggplot, as well...
#' # find the point estimate---mean number of hours worked per week
#' point_estimate <- gss |>
#'   specify(response = hours) |>
#'   calculate(stat = "mean")
#'
#' # find a confidence interval around the point estimate
#' ci <- boot_dist |>
#'   get_confidence_interval(point_estimate = point_estimate,
#'                           # at the 95% confidence level
#'                           level = .95,
#'                           # using the standard error method
#'                           type = "se")
#'
#' # display a shading of the area beyond the p-value on the plot
#' null_dist |>
#'   visualize() +
#'   shade_p_value(obs_stat = point_estimate, direction = "two-sided")
#'
#' # ...or within the bounds of the confidence interval
#' null_dist |>
#'   visualize() +
#'   shade_confidence_interval(ci)
#'
#' # plot a theoretical sampling distribution by creating
#' # a theory-based distribution with `assume()`
#' sampling_dist <- gss |>
#'   specify(response = hours) |>
#'   assume(distribution = "t")
#'
#' visualize(sampling_dist)
#'
#' # you can shade confidence intervals on top of
#' # theoretical distributions, too---the theoretical
#' # distribution will be recentered and rescaled to
#' # align with the confidence interval
#' visualize(sampling_dist) +
#'   shade_confidence_interval(ci)
#'
#'
#' # to plot both a theory-based and simulation-based null distribution,
#' # use a theorized statistic (i.e. one of t, z, F, or Chisq)
#' # and supply the simulation-based null distribution
#' null_dist_t <- gss |>
#'   specify(response = hours) |>
#'   hypothesize(null = "point", mu = 40) |>
#'   generate(reps = 1000, type = "bootstrap") |>
#'   calculate(stat = "t")
#'
#' obs_stat <- gss |>
#'   specify(response = hours) |>
#'   hypothesize(null = "point", mu = 40) |>
#'   calculate(stat = "t")
#'
#' visualize(null_dist_t, method = "both")
#'
#' visualize(null_dist_t, method = "both") +
#'   shade_p_value(obs_stat, "both")
#'
#' \donttest{
#' # to visualize distributions of coefficients for multiple
#' # explanatory variables, use a `fit()`-based workflow
#'
#' # fit 1000 models with the `hours` variable permuted
#' null_fits <- gss |>
#'  specify(hours ~ age + college) |>
#'  hypothesize(null = "independence") |>
#'  generate(reps = 1000, type = "permute") |>
#'  fit()
#'
#' null_fits
#'
#' # visualize distributions of resulting coefficients
#' visualize(null_fits)
#'
#' # the interface to add themes and other elements to patchwork
#' # plots (outputted by `visualize` when the inputted data
#' # is from the `fit()` function) is a bit different than adding
#' # them to ggplot2 plots.
#' library(ggplot2)
#'
#' # to add a ggplot2 theme to a `calculate()`-based visualization, use `+`
#' null_dist |> visualize() + theme_dark()
#'
#' # to add a ggplot2 theme to a `fit()`-based visualization, use `&`
#' null_fits |> visualize() & theme_dark()
#' }
#'
#' # More in-depth explanation of how to use the infer package
#' \dontrun{
#' vignette("infer")
#' }
#'
#' @importFrom ggplot2 ggplot geom_histogram aes
#' @importFrom ggplot2 geom_vline geom_rect geom_bar labs
#' @importFrom stats dt qt df qf dnorm qnorm dchisq qchisq
#' @export
visualize <- function(
  data,
  bins = 15,
  method = "simulation",
  dens_color = "black",
  ...
) {
  if (inherits(data, "infer_dist")) {
    if (!missing(method) && method != "theoretical") {
      cli_warn(c(
        'Simulation-based visualization methods are not well-defined for \\
         `assume()` output; the `method` argument will be ignored.',
        i = 'Set `method = "theoretical"` to silence this message.'
      ))
    }

    method <- "theoretical"
    do_warn <- FALSE
  } else {
    if (method == "theoretical" && !suppress_infer_messages()) {
      cli_inform(
        'Rather than setting `method = "theoretical"` with a simulation-based \\
         null distribution, the preferred method for visualizing theory-based \\
         distributions with infer is now to pass the output of `assume()` as \\
         the first argument to `visualize()`.'
      )
    }

    do_warn <- TRUE
  }

  attr(data, "viz_method") <- method
  attr(data, "viz_bins") <- bins

  dots <- check_dots_for_deprecated(list(...))

  if (is_fitted(data)) {
    term_data <- data |>
      dplyr::rename(stat = estimate) |>
      dplyr::ungroup() |>
      dplyr::group_by(term) |>
      dplyr::group_split() |>
      purrr::map(copy_attrs, data) |>
      purrr::map(copy_attrs, data, c("viz_method", "viz_bins"))

    plots <- purrr::map2(
      term_data,
      purrr::map(term_data, purrr::pluck, "term", 1),
      visualize_term,
      bins = bins,
      method = method,
      dots = dots
    )

    return(
      patchwork::wrap_plots(plots, ncol = 1) +
        title_layer(
          term_data[[1]],
          title_fn = patchwork::plot_annotation
        )
    )
  } else {
    res <- visualize_term(
      data = data,
      term = "stat",
      bins = bins,
      method = method,
      dens_color = dens_color,
      dots = dots,
      do_warn = do_warn
    ) +
      title_layer(data)

    res
  }
}

#' @rdname visualize
#' @export
visualise <- visualize


visualize_term <- function(
  data,
  term,
  bins = 15,
  method = "simulation",
  dens_color = "black",
  dots,
  do_warn = TRUE,
  call = rlang::call2("visualize")
) {
  data <- check_for_nan(data, "visualize")
  check_visualize_args(data, bins, method, dens_color, call = call)
  plot_data <- create_plot_data(data)

  infer_plot <- ggplot(plot_data) +
    simulation_layer(data, dots = dots) +
    theoretical_layer(data, dens_color, dots = dots, do_warn = do_warn) +
    labels_layer(data, term)

  infer_plot
}

check_dots_for_deprecated <- function(dots) {
  dep_args <- c(
    "obs_stat",
    "obs_stat_color",
    "pvalue_fill",
    "direction",
    "endpoints",
    "endpoints_color",
    "ci_fill"
  )

  if (any(dep_args %in% names(dots))) {
    bad_args <- dep_args[dep_args %in% names(dots)]

    cli_warn(
      "The arguments `{list(bad_args)}` are deprecated in `visualize()` \\
       and will be ignored. They should now be passed to one of \\
       `shade_p_value()` or `shade_confidence_interval()`."
    )

    dots[!dep_args %in% names(dots)]
  }

  list(NULL)
}

check_visualize_args <- function(
  data,
  bins,
  method,
  dens_color,
  call = caller_env()
) {
  check_is_distribution(data, "visualize")
  check_type(bins, is.numeric, call = call)
  check_type(method, is.character, call = call)
  check_type(dens_color, is.character, call = call)

  if (!(method %in% c("simulation", "theoretical", "both"))) {
    cli_abort(
      'Provide `method` with one of three options: `"theoretical"`, `"both"`, \\
        or `"simulation"`. `"simulation"` is the default for simulation-based \\
        null distributions, while `"theoretical"` is the only option for \\
        null distributions outputted by `assume()`.',
      call = call
    )
  }

  if (method == "both") {
    if (!("stat" %in% names(data))) {
      cli_abort(
        '`generate()` and `calculate()` are both required to be done prior \\
          to `visualize(method = "both")`',
        call = call
      )
    }

    if (
      ("replicate" %in% names(data)) && (length(unique(data$replicate)) < 100)
    ) {
      cli_warn(
        "With only {length(unique(data$replicate))} replicates, it may be \\
         difficult to see the relationship between simulation and theory."
      )
    }
  }

  TRUE
}

# a function for checking arguments to functions that are added as layers
# to visualize()d objects to make sure they weren't mistakenly piped
check_for_piped_visualize <- function(..., call = caller_env()) {
  is_ggplot_output <- vapply(list(...), ggplot2::is_ggplot, logical(1))

  if (any(is_ggplot_output)) {
    called_function <- sys.call(-1)[[1]]

    cli_abort(
      c(
        "It looks like you piped the result of `visualize()` into \\
       `{called_function}()` rather than adding the result of \\
       `{called_function}()` as a layer with `+`.",
        i = "Consider changing `|>` (or `%>%`) to `+`."
      ),
      call = call
    )
  }

  TRUE
}

impute_endpoints <- function(endpoints, plot = NULL, call = caller_env()) {
  res <- endpoints

  if (is_fitted(endpoints)) {
    x_lab <- x_axis_label(plot)

    res <-
      endpoints |>
      dplyr::filter(term == x_lab) |>
      dplyr::select(-term)

    return(unlist(res))
  }

  if (is.vector(endpoints) && (length(endpoints) != 2)) {
    cli_warn(
      "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. \\
       Using the first two entries as the `endpoints`."
    )
    res <- endpoints[1:2]
  }

  if (is.data.frame(endpoints)) {
    if ((nrow(endpoints) != 1) || (ncol(endpoints) != 2)) {
      cli_abort(
        "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector.",
        call = call
      )
    }

    res <- unlist(endpoints)
  }

  res |> copy_attrs(endpoints, attrs = c("se", "point_estimate"))
}

impute_obs_stat <- function(
  obs_stat,
  direction,
  endpoints,
  call = caller_env()
) {
  obs_stat <- check_obs_stat(obs_stat)

  if (
    !is.null(direction) &&
      (is.null(obs_stat) + is.null(endpoints) != 1)
  ) {
    cli_abort(
      "Shading requires either `endpoints` values for a confidence interval \\
        or the observed statistic `obs_stat` to be provided.",
      call = call
    )
  }

  obs_stat
}

simulation_layer <- function(data, dots = list(NULL)) {
  method <- get_viz_method(data)
  bins <- get_viz_bins(data)

  if (method == "theoretical") {
    return(list())
  }

  # Manual computation of breaks is needed to fix histogram shape in future plot
  # buildings, e.g. after adding p-value areas.
  bin_breaks <- compute_bin_breaks(data, bins)

  if (method == "theoretical") {
    return(list())
  }

  if (method == "simulation") {
    if (length(unique(data$stat)) >= 10) {
      res <- list(
        do.call(
          ggplot2::stat_bin,
          c(
            list(
              mapping = aes(x = stat),
              bins = bins,
              color = "white",
              breaks = bin_breaks
            ),
            dots
          )
        )
      )
    } else {
      # Probably should be removed
      res <- list(
        do.call(
          ggplot2::geom_bar,
          c(list(mapping = aes(x = stat)), dots)
        )
      )
    }
  } else if (method == "both") {
    res <- list(
      do.call(
        ggplot2::stat_bin,
        c(
          list(
            mapping = aes(x = stat, y = ggplot2::after_stat(density)),
            bins = bins,
            color = "white",
            breaks = bin_breaks
          ),
          dots
        )
      )
    )
  }

  res
}

compute_bin_breaks <- function(data, bins) {
  g <- ggplot(data) + ggplot2::stat_bin(aes(stat), bins = bins)
  g_tbl <- ggplot2::ggplot_build(g)[["data"]][[1]]

  c(g_tbl[["xmin"]][1], g_tbl[["xmax"]])
}

theoretical_layer <- function(
  data,
  dens_color,
  dots = list(NULL),
  do_warn = TRUE,
  mean_shift = 0,
  sd_shift = 1
) {
  method <- get_viz_method(data)

  if (method == "simulation") {
    return(list())
  }

  warn_theoretical_layer(data, do_warn)

  theory_type <- short_theory_type(data)

  switch(
    theory_type,
    t = theory_curve(
      method,
      dt,
      qt,
      list(df = attr(data, "distr_param")),
      dens_color,
      mean_shift = mean_shift,
      sd_shift = sd_shift
    ),
    `F` = theory_curve(
      method,
      df,
      qf,
      list(
        df1 = attr(data, "distr_param"),
        df2 = attr(data, "distr_param2")
      ),
      dens_color = dens_color
    ),
    z = theory_curve(
      method,
      dnorm,
      qnorm,
      list(),
      dens_color,
      mean_shift = mean_shift,
      sd_shift = sd_shift
    ),
    `Chi-Square` = theory_curve(
      method,
      dchisq,
      qchisq,
      list(df = attr(data, "distr_param")),
      dens_color
    )
  )
}

warn_theoretical_layer <- function(data, do_warn = TRUE, call = caller_env()) {
  if (!do_warn) {
    return(TRUE)
  }

  method <- get_viz_method(data)

  cli_warn(
    "Check to make sure the conditions have been met for the theoretical \\
     method. {.pkg infer} currently does not check these for you."
  )

  if (
    has_attr(data, "stat") &&
      !(attr(data, "stat") %in% c("t", "z", "Chisq", "F"))
  ) {
    if (method == "theoretical") {
      cli_warn(
        "Your `calculate`d statistic and the theoretical distribution are on \\
         different scales. Displaying only the theoretical distribution."
      )
    } else if (method == "both") {
      cli_abort(
        "Your `calculate`d statistic and the theoretical distribution are on \\
         different scales. Use a standardized `stat` instead.",
        call = call
      )
    }
  }
}

theory_curve <- function(
  method,
  d_fun,
  q_fun,
  args_list,
  dens_color,
  mean_shift = 0,
  sd_shift = 1
) {
  if (method == "theoretical") {
    d_fun_ <- shift_d_fun(d_fun, mean_shift, sd_shift)

    x_range <- (do.call(q_fun, c(p = list(c(0.001, 0.999)), args_list)) *
      sd_shift) +
      mean_shift

    res <- list(
      ggplot2::geom_path(
        data = data.frame(x = x_range),
        mapping = aes(x = x),
        stat = "function",
        fun = d_fun_,
        args = args_list,
        color = dens_color
      )
    )
  } else if (method == "both") {
    res <- list(
      ggplot2::geom_path(
        mapping = aes(x = stat),
        stat = "function",
        fun = d_fun,
        args = args_list,
        color = dens_color
      )
    )
  }

  res
}

shift_d_fun <- function(d_fun_, mean_shift, sd_shift) {
  function(x, ...) {
    d_fun_(x = (x - mean_shift) / sd_shift, ...)
  }
}

# when adding a confidence interval layer, rescale the theoretical
# layer
redraw_theory_layer <- function(plot, mean_shift, sd_shift) {
  plot_data <- plot[["plot_env"]][["data"]]

  plot[["layers"]] <-
    theoretical_layer(
      data = plot_data,
      dens_color = plot[["plot_env"]][["dens_color"]],
      dots = plot[["plot_env"]][["dots"]],
      do_warn = plot[["plot_env"]][["do_warn"]],
      mean_shift = mean_shift,
      sd_shift = sd_shift
    )

  plot
}


title_layer <- function(data, title_fn = function(x) labs(title = x)) {
  method <- get_viz_method(data)
  theory_type <- short_theory_type(data)

  if (is_hypothesized(data) || inherits(data, "infer_dist")) {
    distr_name <- "Null Distribution"
  } else {
    distr_name <- switch(
      attr(data, "type"),
      bootstrap = "Bootstrap Distribution",
      # For other generation types there will be no distribution adjective.
      # However, currently they seem to be never used without `hypothesize()`
      # step.
      "Distribution"
    )
  }

  if (is_fitted(data)) {
    plural <- "s"
  } else {
    plural <- ""
  }

  title_string <- switch(
    method,
    simulation = "Simulation-Based {distr_name}{plural}",
    theoretical = "Theoretical {theory_type} {distr_name}{plural}",
    both = "Simulation-Based and Theoretical {theory_type} {distr_name}s"
  )

  list(title_fn(glue(title_string, .null = "NULL")))
}

labels_layer <- function(data, term) {
  method <- get_viz_method(data)
  theory_type <- short_theory_type(data)

  x_lab <- switch(method, simulation = "{term}", "{theory_type} stat")
  y_lab <- switch(method, simulation = "count", "density")

  labs(
    x = glue(x_lab, .null = "NULL"),
    y = glue(y_lab, .null = "NULL")
  )
}

facet_layer <- function() {
  list(
    ggplot2::facet_wrap(~term, scales = "free_x")
  )
}

check_shade_confidence_interval_args <- function(
  color,
  fill,
  call = caller_env()
) {
  check_type(color, is_color_string, "color string", call = call)
  if (!is.null(fill)) {
    check_type(fill, is_color_string, "color string", call = call)
  }
}

short_theory_type <- function(x) {
  theory_attr <- attr(x, "theory_type")

  if (!has_attr(x, "theory_type")) {
    return("")
  }

  theory_types <- list(
    t = c("Two sample t", "Slope with t", "One sample t"),
    `F` = "ANOVA",
    z = c("One sample prop z", "Two sample props z"),
    `Chi-Square` = c("Chi-square test of indep", "Chi-square Goodness of Fit")
  )

  is_type <- vapply(
    theory_types,
    function(x) {
      theory_attr %in% x
    },
    logical(1)
  )

  names(theory_types)[which(is_type)[1]]
}

get_viz_method <- function(data) {
  attr(data, "viz_method")
}

get_viz_bins <- function(data) {
  attr(data, "viz_bins")
}

#' @method ggplot_add infer_layer
#' @export
ggplot_add.infer_layer <- function(object, plot, ...) {
  # a method for the `+` operator for infer objects.
  # - "object to add" (arguments to the RHS of the `+`)
  # - plot is the existing plot (on the LHS of the `+`)
  # - object_name is the unevaluated call on the RHS of the `+`
  #
  # output is the actual output of the addition - this allows for
  # a more |>-esque programming style
  #
  # the biggest advantage this offers us is that we can
  # overwrite existing elements, i.e. subsetting into the patchwork,
  # modifying its elements (for p-value and confidence interval shading),
  # and then overwriting them.
  #
  # both shade_p_value and shade_confidence_interval now just dispatch here
  # and execute term-wise along a patchwork object, so "object" is only a
  # stand-in classed object that sends to the right place

  # process object_name (shade_* call) ----------------------------------
  shade_fn <- attr(object, "fn")
  shade_args <- attributes(object)[
    !names(attributes(object)) %in%
      c("class", "fn")
  ]
  shade_args["fill"] <- shade_args[["fill"]]

  # if a patchwork object, use a custom `infer_layer` `+.gg` method.
  # otherwise, convert the `infer_layer` back to a list and call `+` again.
  if (inherits(plot, "patchwork")) {
    # use a for loop to invoke the `[[.patchwork` method
    n_patches <- length(plot$patches$plots) + 1

    new_plot <- plot

    for (i in 1:n_patches) {
      args <- shade_args
      args[["plot"]] <- plot[[i]]

      new_plot[[i]] <-
        do.call(
          paste0(shade_fn, "_term"),
          args
        )
    }
  } else {
    args <- shade_args
    args[["plot"]] <- plot

    new_plot <-
      do.call(
        paste0(shade_fn, "_term"),
        args
      )
  }

  new_plot
}

# extract the x axis label from a ggplot -- these are unique
# ids for terms in visualize() workflows
x_axis_label <- function(x) {
  x |> purrr::pluck("labels", "x")
}

create_plot_data <- function(data) {
  if (inherits(data, "infer_dist")) {
    res <- tibble::tibble() |>
      copy_attrs(
        data,
        c("theory_type", "distr_param", "distr_param2", "viz_method")
      )
  } else {
    res <- data
  }

  res
}


================================================
FILE: R/wrappers.R
================================================
# Wrapper functions
# Different shortcuts to doing traditional hypothesis tests & confidence
# intervals in R as well as calculating test statistics, following a pipe-able
# framework

#' Tidy t-test
#'
#' @description
#'
#' A tidier version of [t.test()][stats::t.test()] for two sample tests.
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @inheritParams specify
#' @param order A string vector of specifying the order in which the levels of
#'   the explanatory variable should be ordered for subtraction, where `order =
#'   c("first", "second")` means `("first" - "second")`.
#' @param alternative Character string giving the direction of the alternative
#'   hypothesis. Options are `"two-sided"` (default), `"greater"`, or `"less"`.
#' @param mu A numeric value giving the hypothesized null mean value for a one
#'   sample test and the hypothesized difference for a two sample test.
#' @param conf_int A logical value for whether to include the confidence
#'   interval or not. `TRUE` by default.
#' @param conf_level A numeric value between 0 and 1. Default value is 0.95.
#' @param ... For passing in other arguments to [t.test()][stats::t.test()].
#'
#' @examples
#' library(tidyr)
#'
#' # t test for number of hours worked per week
#' # by college degree status
#' gss |>
#'    tidyr::drop_na(college) |>
#'    t_test(formula = hours ~ college,
#'       order = c("degree", "no degree"),
#'       alternative = "two-sided")
#'
#' # see vignette("infer") for more explanation of the
#' # intuition behind the infer package, and vignette("t_test")
#' # for more examples of t-tests using infer
#'
#' @importFrom rlang f_lhs
#' @importFrom rlang f_rhs
#' @importFrom rlang new_formula
#' @importFrom stats as.formula
#' @family wrapper functions
#' @export
t_test <- function(
  x,
  formula,
  response = NULL,
  explanatory = NULL,
  order = NULL,
  alternative = "two-sided",
  mu = 0,
  conf_int = TRUE,
  conf_level = 0.95,
  ...
) {
  check_conf_level(conf_level)

  # convert all character and logical variables to be factor variables
  x <- standardize_variable_types(x)

  # parse response and explanatory variables
  response <- enquo(response)
  explanatory <- enquo(explanatory)
  x <- parse_variables(
    x = x,
    formula = formula,
    response = response,
    explanatory = explanatory
  )

  # match with old "dot" syntax in t.test
  if (alternative %in% c("two-sided", "two_sided", "two sided", "two.sided")) {
    alternative <- "two.sided"
  }

  # two sample
  if (has_explanatory(x)) {
    order <- check_order(x, order, in_calculate = FALSE, stat = NULL)
    x <- reorder_explanatory(x, order)
    prelim <- stats::t.test(
      formula = new_formula(response_expr(x), explanatory_expr(x)),
      data = x,
      alternative = alternative,
      mu = mu,
      conf.level = conf_level,
      ...
    ) |>
      broom::glance()
  } else {
    # one sample
    prelim <- stats::t.test(
      response_variable(x),
      alternative = alternative,
      mu = mu,
      conf.level = conf_level
    ) |>
      broom::glance()
  }

  if (conf_int) {
    results <- prelim |>
      dplyr::select(
        statistic,
        t_df = parameter,
        p_value = p.value,
        alternative,
        estimate,
        lower_ci = conf.low,
        upper_ci = conf.high
      )
  } else {
    results <- prelim |>
      dplyr::select(
        statistic,
        t_df = parameter,
        p_value = p.value,
        alternative,
        estimate
      )
  }

  results
}

#' Tidy t-test statistic
#'
#' @description
#'
#' A shortcut wrapper function to get the observed test statistic for a t test.
#' This function has been deprecated in favor of the more general [observe()].
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @inheritParams specify
#' @param order A string vector of specifying the order in which the levels of
#'   the explanatory variable should be ordered for subtraction, where `order =
#'   c("first", "second")` means `("first" - "second")`.
#' @param mu A numeric value giving the hypothesized null mean value for a one
#'   sample test and the hypothesized difference for a two sample test.
#' @inheritParams t_test
#' @param ... Pass in arguments to infer functions.
#'
#' @examples
#' library(tidyr)
#'
#' # t test statistic for true mean number of hours worked
#' # per week of 40
#' gss |>
#'    t_stat(response = hours, mu = 40)
#'
#' # t test statistic for number of hours worked per week
#' # by college degree status
#' gss |>
#'    tidyr::drop_na(college) |>
#'    t_stat(formula = hours ~ college,
#'       order = c("degree", "no degree"),
#'       alternative = "two-sided")
#'
#' @family wrapper functions
#' @family functions for calculating observed statistics
#' @export
t_stat <- function(
  x,
  formula,
  response = NULL,
  explanatory = NULL,
  order = NULL,
  alternative = "two-sided",
  mu = 0,
  conf_int = FALSE,
  conf_level = 0.95,
  ...
) {
  lifecycle::deprecate_warn(
    when = "1.0.0",
    what = "t_stat()",
    with = "observe()"
  )

  check_conf_level(conf_level)

  # convert all character and logical variables to be factor variables
  x <- standardize_variable_types(x)

  # parse response and explanatory variables
  response <- enquo(response)
  explanatory <- enquo(explanatory)
  x <- parse_variables(
    x = x,
    formula = formula,
    response = response,
    explanatory = explanatory
  )

  # match with old "dot" syntax in t.test
  if (alternative %in% c("two-sided", "two_sided", "two sided", "two.sided")) {
    alternative <- "two.sided"
  }

  # two sample
  if (has_explanatory(x)) {
    order <- check_order(x, order, in_calculate = FALSE, stat = NULL)
    x <- reorder_explanatory(x, order)
    prelim <- stats::t.test(
      formula = new_formula(response_expr(x), explanatory_expr(x)),
      data = x,
      alternative = alternative,
      mu = mu,
      conf.level = conf_level,
      ...
    ) |>
      broom::glance()
  } else {
    # one sample
    prelim <- stats::t.test(
      response_variable(x),
      alternative = alternative,
      mu = mu,
      conf.level = conf_level
    ) |>
      broom::glance()
  }

  # removed unnecessary if(conf_int) clause; only the statistic itself
  # was returned regardless
  results <- prelim |>
    dplyr::select(statistic) |>
    pull()

  results
}

#' Tidy chi-squared test
#'
#' @description
#'
#' A tidier version of [chisq.test()][stats::chisq.test()] for goodness of fit
#' tests and tests of independence.
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @inheritParams specify
#' @param ... Additional arguments for [chisq.test()][stats::chisq.test()].
#'
#' @examples
#' # chi-squared test of independence for college completion
#' # status depending on one's self-identified income class
#' chisq_test(gss, college ~ finrela)
#'
#' # chi-squared goodness of fit test on whether self-identified
#' # income class follows a uniform distribution
#' chisq_test(gss,
#'            response = finrela,
#'            p = c("far below average" = 1/6,
#'                  "below average" = 1/6,
#'                  "average" = 1/6,
#'                  "above average" = 1/6,
#'                  "far above average" = 1/6,
#'                  "DK" = 1/6))
#'
#' @family wrapper functions
#' @export
chisq_test <- function(x, formula, response = NULL, explanatory = NULL, ...) {
  # Parse response and explanatory variables
  response <- enquo(response)
  explanatory <- enquo(explanatory)

  x <- standardize_variable_types(x)

  x <- parse_variables(
    x = x,
    formula = formula,
    response = response,
    explanatory = explanatory
  )

  if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) {
    cli_abort(
      'The response variable of `{response_name(x)}` is not appropriate \\
       since the response variable is expected to be categorical.'
    )
  }
  if (
    has_explanatory(x) &&
      !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))
  ) {
    cli_abort(
      'The explanatory variable of `{explanatory_name(x)}` is not appropriate \\
       since the explanatory variable is expected to be categorical.'
    )
  }

  x <- x |>
    select(any_of(c(response_name(x), explanatory_name(x))))

  stats::chisq.test(table(x), ...) |>
    broom::glance() |>
    dplyr::select(statistic, chisq_df = parameter, p_value = p.value)
}

#' Tidy chi-squared test statistic
#'
#'  @description
#'
#' A shortcut wrapper function to get the observed test statistic for a chisq
#' test. Uses [chisq.test()][stats::chisq.test()], which applies a continuity
#' correction. This function has been deprecated in favor of the more
#' general [observe()].
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @inheritParams specify
#' @param ... Additional arguments for [chisq.test()][stats::chisq.test()].
#'
#' @examples
#' # chi-squared test statistic for test of independence
#' # of college completion status depending and one's
#' # self-identified income class
#' chisq_stat(gss, college ~ finrela)
#'
#' # chi-squared test statistic for a goodness of fit
#' # test on whether self-identified income class
#' # follows a uniform distribution
#' chisq_stat(gss,
#'            response = finrela,
#'            p = c("far below average" = 1/6,
#'                  "below average" = 1/6,
#'                  "average" = 1/6,
#'                  "above average" = 1/6,
#'                  "far above average" = 1/6,
#'                  "DK" = 1/6))
#'
#' @family wrapper functions
#' @family functions for calculating observed statistics
#' @export
chisq_stat <- function(x, formula, response = NULL, explanatory = NULL, ...) {
  lifecycle::deprecate_warn(
    when = "1.0.0",
    what = "chisq_stat()",
    with = "observe()"
  )

  # Parse response and explanatory variables
  response <- enquo(response)
  explanatory <- enquo(explanatory)
  x <- standardize_variable_types(x)

  x <- parse_variables(
    x = x,
    formula = formula,
    response = response,
    explanatory = explanatory
  )

  if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) {
    cli_abort(
      'The response variable of `{response_name(x)}` is not appropriate \\
       since the response variable is expected to be categorical.'
    )
  }
  if (
    has_explanatory(x) &&
      !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))
  ) {
    cli_abort(
      'The explanatory variable of `{explanatory_name(x)}` is not appropriate \\
       since the response variable is expected to be categorical.'
    )
  }

  x <- x |>
    select(any_of(c(response_name(x), explanatory_name(x))))

  suppressWarnings(stats::chisq.test(table(x), ...)) |>
    broom::glance() |>
    dplyr::select(statistic) |>
    pull()
}

check_conf_level <- function(conf_level, call = caller_env()) {
  if (
    (!inherits(conf_level, "numeric")) | (conf_level < 0) | (conf_level > 1)
  ) {
    cli_abort(
      "The `conf_level` argument must be a number between 0 and 1.",
      call = call
    )
  }
}

#' Tidy proportion test
#'
#' @description
#'
#' A tidier version of [prop.test()][stats::prop.test()] for equal or given
#' proportions.
#'
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @inheritParams specify
#' @param order A string vector specifying the order in which the proportions
#'   should be subtracted, where  `order = c("first", "second")` means
#'   `"first" - "second"`. Ignored for one-sample tests, and optional for two
#'   sample tests.
#' @param alternative Character string giving the direction of the alternative
#'   hypothesis. Options are `"two-sided"` (default), `"greater"`, or `"less"`.
#'   Only used when testing the null that a single proportion equals a given
#'   value, or that two proportions are equal; ignored otherwise.
#' @param p A numeric vector giving the hypothesized null proportion of
#' success for each group.
#' @inheritParams t_test
#' @param success The level of `response` that will be considered a success, as
#'   a string. Only used when testing the null that a single
#'   proportion equals a given value, or that two proportions are equal;
#'   ignored otherwise.
#' @param correct A logical indicating whether Yates' continuity correction
#'   should be applied where possible. If `z = TRUE`, the `correct` argument will
#'   be overwritten as `FALSE`. Otherwise defaults to `correct = TRUE`.
#' @param z A logical value for whether to report the statistic as a standard
#'   normal deviate or a Pearson's chi-square statistic. \eqn{z^2}  is distributed
#'   chi-square with 1 degree of freedom, though note that the user will likely
#'   need to turn off Yates' continuity correction by setting `correct = FALSE`
#'   to see this connection.
#' @param ... Additional arguments for [prop.test()][stats::prop.test()].
#'
#' @details
#' When testing with an explanatory variable with more than two levels, the
#' `order` argument as used in the package is no longer well-defined. The function
#' will thus raise a warning and ignore the value if supplied a non-NULL `order`
#' argument.
#'
#' The columns present in the output depend on the output of both [prop.test()]
#' and [broom::glance.htest()]. See the latter's documentation for column
#' definitions; columns have been renamed with the following mapping:
#'
#' * `chisq_df` = `parameter`
#' * `p_value` = `p.value`
#' * `lower_ci` = `conf.low`
#' * `upper_ci` = `conf.high`
#'
#' @examples
#' # two-sample proportion test for difference in proportions of
#' # college completion by respondent sex
#' prop_test(gss,
#'           college ~ sex,
#'           order = c("female", "male"))
#'
#' # one-sample proportion test for hypothesized null
#' # proportion of college completion of .2
#' prop_test(gss,
#'           college ~ NULL,
#'           p = .2)
#'
#' # report as a z-statistic rather than chi-square
#' # and specify the success level of the response
#' prop_test(gss,
#'           college ~ NULL,
#'           success = "degree",
#'           p = .2,
#'           z = TRUE)
#'
#' @family wrapper functions
#' @export
prop_test <- function(
  x,
  formula,
  response = NULL,
  explanatory = NULL,
  p = NULL,
  order = NULL,
  alternative = "two-sided",
  conf_int = TRUE,
  conf_level = 0.95,
  success = NULL,
  correct = NULL,
  z = FALSE,
  ...
) {
  # Parse response and explanatory variables
  response <- enquo(response)
  explanatory <- enquo(explanatory)
  x <- standardize_variable_types(x)

  x <- parse_variables(
    x = x,
    formula = formula,
    response = response,
    explanatory = explanatory
  )

  correct <- if (z) {
    FALSE
  } else if (is.null(correct)) {
    TRUE
  } else {
    correct
  }

  if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) {
    cli_abort(
      'The response variable of `{response_name(x)}` is not appropriate \\
      since the response variable is expected to be categorical.'
    )
  }
  if (
    has_explanatory(x) &&
      !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))
  ) {
    cli_abort(
      'The explanatory variable of `{explanatory_name(x)}` is not appropriate \\
       since the explanatory variable is expected to be categorical.'
    )
  }
  # match with old "dot" syntax in t.test
  if (alternative %in% c("two-sided", "two_sided", "two sided", "two.sided")) {
    alternative <- "two.sided"
  }

  # process "success" arg
  lvls <- levels(factor(response_variable(x)))

  if (length(lvls) > 2) {
    cli_abort(
      "This test is not defined for response variables with more than 2 levels."
    )
  }

  if (!is.null(success)) {
    check_type(success, rlang::is_string)

    if (!(success %in% lvls)) {
      cli_abort('{success} is not a valid level of {response_name(x)}.')
    }

    lvls <- c(success, lvls[lvls != success])
  } else {
    success <- lvls[1]
  }

  # two sample
  if (has_explanatory(x)) {
    # make a summary table to supply to prop.test
    sum_table <- x |>
      select(explanatory_name(x), response_name(x)) |>
      table()

    length_exp_levels <- length(levels(explanatory_variable(x)))
    if (length_exp_levels == 2) {
      order <- check_order(x, order, in_calculate = FALSE, stat = NULL)
      # reorder according to the order and success arguments
      sum_table <- sum_table[order, lvls]
    } else if (length_exp_levels >= 3 && !is.null(order)) {
      cli_warn(c(
        "The `order` argument will be ignored as it is not well-defined \\
             for explanatory variables with more than 2 levels. ",
        i = "To silence this message, avoid passing the `order` argument."
      ))
      # reorder according to the success argument
      sum_table <- sum_table[, lvls]
    }

    prelim <- stats::prop.test(
      x = sum_table,
      alternative = alternative,
      conf.level = conf_level,
      p = p,
      correct = correct,
      ...
    )
  } else {
    # one sample
    response_tbl <- response_variable(x) |>
      factor() |>
      stats::relevel(success) |>
      table()

    if (is.null(p)) {
      cli_inform(
        "No `p` argument was hypothesized, so the test will \\
         assume a null hypothesis `p = .5`."
      )
    }

    prelim <- stats::prop.test(
      x = response_tbl,
      alternative = alternative,
      conf.level = conf_level,
      p = p,
      correct = correct,
      ...
    )
  }

  if (length(prelim$estimate) <= 2) {
    if (conf_int & is.null(p)) {
      results <- prelim |>
        broom::glance() |>
        dplyr::select(
          statistic,
          chisq_df = parameter,
          p_value = p.value,
          alternative,
          lower_ci = conf.low,
          upper_ci = conf.high
        )
    } else {
      results <- prelim |>
        broom::glance() |>
        dplyr::select(
          statistic,
          chisq_df = parameter,
          p_value = p.value,
          alternative
        )
    }
  } else {
    results <- prelim |>
      broom::glance() |>
      dplyr::select(statistic, chisq_df = parameter, p_value = p.value)
  }

  if (z) {
    results <- calculate_z(x, results, success, p, order)
  }

  results
}

calculate_z <- function(x, results, success, p, order) {
  exp <- if (has_explanatory(x)) {
    explanatory_expr(x)
  } else {
    NULL
  }

  form <- new_formula(response_expr(x), exp)

  stat <- x |>
    specify(formula = form, success = success) |>
    hypothesize(
      null = if (has_explanatory(x)) {
        "independence"
      } else {
        "point"
      },
      p = if (is.null(p) && !has_explanatory(x)) {
        .5
      } else {
        p
      }
    ) |>
    calculate(
      stat = "z",
      order = if (has_explanatory(x)) {
        order
      } else {
        NULL
      }
    ) |>
    dplyr::pull()

  results$statistic <- stat
  results$chisq_df <- NULL

  results
}


================================================
FILE: README.Rmd
================================================
---
output: github_document
---

# infer R Package <img src="man/figures/logo.png" alt="A hexagonal logo. A green silhouette of a fir tree sits atop black text, reading 'infer'. The logo has a background in two shades of blue, resembling the Oregon license plate." align="right" width=280 />


<!--figs/infer.svg-->
<!--http://www.r-pkg.org/badges/version/infer-->
<!--figs/main.svg-->
<!--https://img.shields.io/codecov/c/github/tidymodels/infer/main.svg-->

[![R-CMD-check](https://github.com/tidymodels/infer/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidymodels/infer/actions/workflows/R-CMD-check.yaml)
[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/infer)](https://cran.r-project.org/package=infer)
[![Codecov test coverage](https://codecov.io/gh/tidymodels/infer/graph/badge.svg)](https://app.codecov.io/gh/tidymodels/infer)

The objective of this package is to perform statistical inference using an expressive statistical grammar that coheres with the tidyverse design framework. The package is centered around 4 main verbs, supplemented with many utilities to visualize and extract value from their outputs.

+ `specify()` allows you to specify the variable, or relationship between variables, that you're interested in.
+ `hypothesize()` allows you to declare the null hypothesis.
+ `generate()` allows you to generate data reflecting the null hypothesis.
+ `calculate()` allows you to calculate a distribution of statistics from the generated data to form the null distribution.

To learn more about the principles underlying the package design, see `vignette("infer")`.

```{r load-package, echo = FALSE, message = FALSE, warning = FALSE}
library(infer)
```

```{r diagram, echo = FALSE, fig.cap = " ", fig.alt = "A diagram showing four steps to carry out randomization-based inference: specify hypothesis, generate data, calculate statistic, and visualize. From left to right, each step is connected by an arrow, while the diagram indicates that generating data and calculating statistics can happen iteratively."}
knitr::include_graphics("https://raw.githubusercontent.com/tidymodels/infer/main/figs/ht-diagram.png")
```

If you're interested in learning more about randomization-based statistical inference generally, including applied examples of this package, we recommend checking out [Statistical Inference Via Data Science: A ModernDive Into R and the Tidyverse](https://moderndive.com/v2/) and [Introduction to Modern Statistics](https://openintro-ims.netlify.app/).

### Installation

------------------------------------------------------------------------

To install the current stable version of infer from CRAN:

```{r, eval = FALSE}
install.packages("infer")
```

To install the developmental stable version of infer, make sure to install remotes first. The pkgdown website for this version is at [infer.tidymodels.org](https://infer.tidymodels.org/).

```{r, eval = FALSE}
# install.packages("pak")
pak::pak("tidymodels/infer")
```

### Contributing

------------------------------------------------------------------------

We welcome others helping us make this package as user-friendly and efficient as possible. Please review our [contributing](https://github.com/tidymodels/infer/blob/main/CONTRIBUTING.md) and [conduct](https://github.com/tidymodels/infer/blob/main/.github/CODE_OF_CONDUCT.md) guidelines. By participating in this project you agree to abide by its terms.

For questions and discussions about tidymodels packages, modeling, and machine learning, please [post on Posit Community](https://forum.posit.co/new-topic?category_id=15&tags=tidymodels,question). If you think you have encountered a bug, please [submit an issue](https://github.com/tidymodels/infer/issues). Either way, learn how to create and share a [reprex](https://reprex.tidyverse.org/articles/learn-reprex.html) (a minimal, reproducible example), to clearly communicate about your code. Check out further details on [contributing guidelines for tidymodels packages](https://www.tidymodels.org/contribute/) and [how to get help](https://www.tidymodels.org/help/).

### Examples

------------------------------------------------------------------------

These examples are pulled from the "Full infer Pipeline Examples" vignette, accessible by calling `vignette("observed_stat_examples")`. They make use of the `gss` dataset supplied by the package, providing a sample of data from the [General Social Survey](https://gss.norc.org). The data looks like this:

```{r load-gss, warning = FALSE, message = FALSE}
# load in the dataset
data(gss)

# take a glimpse at it
str(gss)
```

As an example, we'll run an analysis of variance on `age` and `partyid`, testing whether the age of a respondent is independent of their political party affiliation.

Calculating the observed statistic,

```{r, message = FALSE, warning = FALSE}
F_hat <- gss |> 
  specify(age ~ partyid) |>
  calculate(stat = "F")
```

Then, generating the null distribution,

```{r, message = FALSE, warning = FALSE}
null_dist <- gss |>
   specify(age ~ partyid) |>
   hypothesize(null = "independence") |>
   generate(reps = 1000, type = "permute") |>
   calculate(stat = "F")
```

Visualizing the observed statistic alongside the null distribution,

```{r viz, message = FALSE, warning = FALSE, eval = FALSE}
visualize(null_dist) +
  shade_p_value(obs_stat = F_hat, direction = "greater")
```

```{r viz-graphic, message = FALSE, warning = FALSE, echo = FALSE, fig.cap = " ", fig.alt = "A histogram showing a distribution of F statistics, right-tailed and centered around one. The x axis ranges from zero to five. The region of the histogram to the right of the observed statistic, just above two, is shaded red to represent the p-value."}
knitr::include_graphics("https://raw.githubusercontent.com/tidymodels/infer/main/README_files/figure-gfm/viz-1.png")
```

Calculating the p-value from the null distribution and observed statistic,

```{r, message = FALSE, warning = FALSE}
null_dist |>
  get_p_value(obs_stat = F_hat, direction = "greater")
```


Note that the formula and non-formula interfaces  (i.e., `age ~ partyid` vs. `response = age, explanatory =  partyid`) work for all implemented inference procedures in `infer`. Use whatever is more natural for you. If you will be doing modeling using functions like `lm()` and `glm()`, though, we recommend you begin to use the formula `y ~ x` notation as soon as possible.

Other resources are available in the package vignettes! See `vignette("observed_stat_examples")` for more examples like the one above, and `vignette("infer")` for discussion of the underlying principles of the package design.


================================================
FILE: README.md
================================================

# infer R Package <img src="man/figures/logo.png" alt="A hexagonal logo. A green silhouette of a fir tree sits atop black text, reading 'infer'. The logo has a background in two shades of blue, resembling the Oregon license plate." align="right" width=280 />

<!--figs/infer.svg-->

<!--http://www.r-pkg.org/badges/version/infer-->

<!--figs/main.svg-->

<!--https://img.shields.io/codecov/c/github/tidymodels/infer/main.svg-->

[![R-CMD-check](https://github.com/tidymodels/infer/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidymodels/infer/actions/workflows/R-CMD-check.yaml)
[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/infer)](https://cran.r-project.org/package=infer)
[![Codecov test
coverage](https://codecov.io/gh/tidymodels/infer/graph/badge.svg)](https://app.codecov.io/gh/tidymodels/infer)

The objective of this package is to perform statistical inference using
an expressive statistical grammar that coheres with the tidyverse design
framework. The package is centered around 4 main verbs, supplemented
with many utilities to visualize and extract value from their outputs.

- `specify()` allows you to specify the variable, or relationship
  between variables, that you’re interested in.
- `hypothesize()` allows you to declare the null hypothesis.
- `generate()` allows you to generate data reflecting the null
  hypothesis.
- `calculate()` allows you to calculate a distribution of statistics
  from the generated data to form the null distribution.

To learn more about the principles underlying the package design, see
`vignette("infer")`.

<div class="figure">

<img src="https://raw.githubusercontent.com/tidymodels/infer/main/figs/ht-diagram.png" alt="A diagram showing four steps to carry out randomization-based inference: specify hypothesis, generate data, calculate statistic, and visualize. From left to right, each step is connected by an arrow, while the diagram indicates that generating data and calculating statistics can happen iteratively."  />
<p class="caption">

</p>

</div>

If you’re interested in learning more about randomization-based
statistical inference generally, including applied examples of this
package, we recommend checking out [Statistical Inference Via Data
Science: A ModernDive Into R and the
Tidyverse](https://moderndive.com/v2/) and [Introduction to Modern
Statistics](https://openintro-ims.netlify.app/).

### Installation

------------------------------------------------------------------------

To install the current stable version of infer from CRAN:

``` r
install.packages("infer")
```

To install the developmental stable version of infer, make sure to
install remotes first. The pkgdown website for this version is at
[infer.tidymodels.org](https://infer.tidymodels.org/).

``` r
# install.packages("pak")
pak::pak("tidymodels/infer")
```

### Contributing

------------------------------------------------------------------------

We welcome others helping us make this package as user-friendly and
efficient as possible. Please review our
[contributing](https://github.com/tidymodels/infer/blob/main/CONTRIBUTING.md)
and
[conduct](https://github.com/tidymodels/infer/blob/main/.github/CODE_OF_CONDUCT.md)
guidelines. By participating in this project you agree to abide by its
terms.

For questions and discussions about tidymodels packages, modeling, and
machine learning, please [post on Posit
Community](https://forum.posit.co/new-topic?category_id=15&tags=tidymodels,question).
If you think you have encountered a bug, please [submit an
issue](https://github.com/tidymodels/infer/issues). Either way, learn
how to create and share a
[reprex](https://reprex.tidyverse.org/articles/learn-reprex.html) (a
minimal, reproducible example), to clearly communicate about your code.
Check out further details on [contributing guidelines for tidymodels
packages](https://www.tidymodels.org/contribute/) and [how to get
help](https://www.tidymodels.org/help/).

### Examples

------------------------------------------------------------------------

These examples are pulled from the “Full infer Pipeline Examples”
vignette, accessible by calling `vignette("observed_stat_examples")`.
They make use of the `gss` dataset supplied by the package, providing a
sample of data from the [General Social Survey](https://gss.norc.org).
The data looks like this:

``` r
# load in the dataset
data(gss)

# take a glimpse at it
str(gss)
```

    ## tibble [500 × 11] (S3: tbl_df/tbl/data.frame)
    ##  $ year   : num [1:500] 2014 1994 1998 1996 1994 ...
    ##  $ age    : num [1:500] 36 34 24 42 31 32 48 36 30 33 ...
    ##  $ sex    : Factor w/ 2 levels "male","female": 1 2 1 1 1 2 2 2 2 2 ...
    ##  $ college: Factor w/ 2 levels "no degree","degree": 2 1 2 1 2 1 1 2 2 1 ...
    ##  $ partyid: Factor w/ 5 levels "dem","ind","rep",..: 2 3 2 2 3 3 1 2 3 1 ...
    ##  $ hompop : num [1:500] 3 4 1 4 2 4 2 1 5 2 ...
    ##  $ hours  : num [1:500] 50 31 40 40 40 53 32 20 40 40 ...
    ##  $ income : Ord.factor w/ 12 levels "lt $1000"<"$1000 to 2999"<..: 12 11 12 12 12 12 12 12 12 10 ...
    ##  $ class  : Factor w/ 6 levels "lower class",..: 3 2 2 2 3 3 2 3 3 2 ...
    ##  $ finrela: Factor w/ 6 levels "far below average",..: 2 2 2 4 4 3 2 4 3 1 ...
    ##  $ weight : num [1:500] 0.896 1.083 0.55 1.086 1.083 ...

As an example, we’ll run an analysis of variance on `age` and `partyid`,
testing whether the age of a respondent is independent of their
political party affiliation.

Calculating the observed statistic,

``` r
F_hat <- gss |> 
  specify(age ~ partyid) |>
  calculate(stat = "F")
```

Then, generating the null distribution,

``` r
null_dist <- gss |>
   specify(age ~ partyid) |>
   hypothesize(null = "independence") |>
   generate(reps = 1000, type = "permute") |>
   calculate(stat = "F")
```

Visualizing the observed statistic alongside the null distribution,

``` r
visualize(null_dist) +
  shade_p_value(obs_stat = F_hat, direction = "greater")
```

<div class="figure">

<img src="https://raw.githubusercontent.com/tidymodels/infer/main/README_files/figure-gfm/viz-1.png" alt="A histogram showing a distribution of F statistics, right-tailed and centered around one. The x axis ranges from zero to five. The region of the histogram to the right of the observed statistic, just above two, is shaded red to represent the p-value."  />
<p class="caption">

</p>

</div>

Calculating the p-value from the null distribution and observed
statistic,

``` r
null_dist |>
  get_p_value(obs_stat = F_hat, direction = "greater")
```

    ## # A tibble: 1 × 1
    ##   p_value
    ##     <dbl>
    ## 1   0.053

Note that the formula and non-formula interfaces (i.e., `age ~ partyid`
vs. `response = age, explanatory =  partyid`) work for all implemented
inference procedures in `infer`. Use whatever is more natural for you.
If you will be doing modeling using functions like `lm()` and `glm()`,
though, we recommend you begin to use the formula `y ~ x` notation as
soon as possible.

Other resources are available in the package vignettes! See
`vignette("observed_stat_examples")` for more examples like the one
above, and `vignette("infer")` for discussion of the underlying
principles of the package design.


================================================
FILE: _pkgdown.yml
================================================
url: https://infer.tidymodels.org

template:
  package: tidytemplate
  bootstrap: 5
  bslib:
    danger: "#CA225E"
    primary: "#CA225E"
  includes:
    in_header: |
      <script src="https://cdn.jsdelivr.net/gh/posit-dev/supported-by-posit/js/badge.min.js" data-max-height="43" data-light-bg="#666f76" data-light-fg="#f9f9f9"></script>
      <script defer data-domain="infer.tidymodels.org,all.tidymodels.org" src="https://plausible.io/js/plausible.js"></script>

figures:
  fig.width: 8
  fig.height: 5.75

reference:
- title: Core Verbs
  contents:
  - specify
  - hypothesize
  - generate
  - calculate
  - fit.infer
  - assume
- title: Helpers
  contents:
  - visualize
  - get_p_value
  - get_confidence_interval
  - shade_p_value
  - shade_confidence_interval
- title: Wrappers
  contents:
  - observe
  - ends_with("_test")
  - ends_with("_stat")
- title: Miscellaneous
  contents:
  - infer
  - gss
  - deprecated
  - rep_sample_n
  - "`%>%`"
  - print.infer

articles:
- title: Articles
  navbar:
  contents:
  - infer
  - t_test
  - anova
  - chi_squared
  - paired
  - observed_stat_examples

development:
  mode: auto


================================================
FILE: air.toml
================================================


================================================
FILE: codecov.yml
================================================
comment: false

coverage:
  status:
    project:
      default:
        target: auto
        threshold: 1%
        informational: true
    patch:
      default:
        target: auto
        threshold: 1%
        informational: true


================================================
FILE: data-raw/save_gss.R
================================================
library(dplyr)
library(forcats)
library(srvyr)
library(ggplot2)

# pull gss data
temp <- tempfile()
download.file("https://gss.norc.org/documents/stata/GSS_stata.zip", temp)

# if this next line errors with "No such file or directory", try
# incrementing the number after "_R"
gss_orig <- haven::read_dta(unz(temp, filename = "GSS7218_R2.DTA")) |>
  haven::as_factor()
unlink(temp)

# select relevant columns
gss_small <- gss_orig |>
  filter(!stringr::str_detect(sample, "blk oversamp")) |> # this is for weighting
  select(
    year,
    age,
    sex,
    college = degree,
    partyid,
    hompop,
    hours = hrs1,
    income,
    class,
    finrela,
    weight = wtssall
  ) |>
  mutate_if(
    is.factor,
    ~ fct_collapse(., NULL = c("IAP", "NA", "iap", "na"))
  ) |>
  mutate(
    age = age |>
      fct_recode("89" = "89 or older", NULL = "DK") |> # truncated at 89
      as.character() |>
      as.numeric(),
    hompop = hompop |>
      fct_collapse(NULL = c("DK")) |>
      as.character() |>
      as.numeric(),
    hours = hours |>
      fct_recode("89" = "89+ hrs", NULL = "DK") |> # truncated at 89
      as.character() |>
      as.numeric(),
    weight = weight |>
      as.character() |>
      as.numeric(),
    partyid = fct_collapse(
      partyid,
      dem = c("strong democrat", "not str democrat"),
      rep = c("strong republican", "not str republican"),
      ind = c("ind,near dem", "independent", "ind,near rep"),
      other = "other party"
    ),
    income = factor(income, ordered = TRUE),
    college = fct_collapse(
      college,
      degree = c("junior college", "bachelor", "graduate"),
      "no degree" = c("lt high school", "high school"),
      NULL = "dk" # no dks show up in the data, so drop this level
    )
  )

# sample 3k rows, first dropping NAs
set.seed(20200201)
gss <- gss_small |>
  drop_na() |>
  sample_n(500)

# check that the sample is similar unweighted to weighted
gss_wt <- srvyr::as_survey_design(gss, weights = weight)

unweighted <- gss |>
  group_by(year, sex, partyid) |>
  summarize(n = n()) |>
  ungroup() |>
  group_by(year, sex) |>
  mutate(prop = n / sum(n))

weighted <- gss_wt |>
  group_by(year, sex, partyid) |>
  summarize(prop = srvyr::survey_mean())

# save data into package
usethis::use_data(gss, overwrite = TRUE)

devtools::document()


================================================
FILE: figs/paper/apa.csl
================================================
<?xml version="1.0" encoding="utf-8"?>
<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="never">
  <info>
    <title>American Psychological Association 6th Edition</title>
    <id>http://www.zotero.org/styles/apa</id>
    <link href="http://www.zotero.org/styles/apa" rel="self"/>
    <link href="http://owl.english.purdue.edu/owl/resource/560/01/" rel="documentation"/>
    <author>
      <name>Simon Kornblith</name>
      <email>simon@simonster.com</email>
    </author>
    <contributor>
      <name>Bruce D'Arcus</name>
    </contributor>
    <contributor>
      <name>Curtis M. Humphrey</name>
    </contributor>
    <contributor>
      <name>Richard Karnesky</name>
      <email>karnesky+zotero@gmail.com</email>
      <uri>http://arc.nucapt.northwestern.edu/Richard_Karnesky</uri>
    </contributor>
    <contributor>
      <name>Sebastian Karcher</name>
    </contributor>
    <category field="psychology"/>
    <category field="generic-base"/>
    <category citation-format="author-date"/>
    <updated>2010-01-27T20:08:03+00:00</updated>
    <rights>This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License: http://creativecommons.org/licenses/by-sa/3.0/</rights>
  </info>
  <locale xml:lang="en">
    <terms>
      <term name="translator" form="short">
        <single>trans.</single>
        <multiple>trans.</multiple>
      </term>
    </terms>
  </locale>
  <macro name="container-contributors">
    <choose>
      <if type="chapter paper-conference" match="any">
        <text term="in" text-case="capitalize-first" suffix=" "/>
        <names variable="editor" delimiter=", " suffix=", ">
          <name and="symbol" initialize-with=". " delimiter=", "/>
          <label form="short" prefix=" (" text-case="capitalize-first" suffix=")"/>
          <substitute>
            <names variable="translator"/>
          </substitute>
        </names>
      </if>
    </choose>
  </macro>
  <macro name="secondary-contributors">
    <choose>
      <if type="chapter paper-conference" match="none">
        <names variable="translator" delimiter=", " prefix=" (" suffix=")">
          <name and="symbol" initialize-with=". " delimiter=", "/>
          <label form="short" prefix=", " text-case="capitalize-first" suffix=""/>
          <substitute>
            <names variable="editor"/>
          </substitute>
        </names>
      </if>
    </choose>
  </macro>
  <macro name="author">
    <names variable="author">
      <name name-as-sort-order="all" and="symbol" sort-separator=", " initialize-with=". " delimiter=", " delimiter-precedes-last="always"/>
      <label form="short" prefix=" (" suffix=".)" text-case="capitalize-first" strip-periods="true"/>
      <substitute>
        <names variable="editor"/>
        <names variable="translator"/>
        <choose>
          <if type="report">
            <text variable="publisher"/>
            <text macro="title"/>
          </if>
          <else>
            <text macro="title"/>
          </else>
        </choose>
      </substitute>
    </names>
  </macro>
  <macro name="author-short">
    <names variable="author">
      <name form="short" and="symbol" delimiter=", " initialize-with=". "/>
      <substitute>
        <names variable="editor"/>
        <names variable="translator"/>
        <choose>
          <if type="report">
            <text variable="publisher"/>
            <text variable="title" form="short" font-style="italic"/>
          </if>
          <else-if type="bill book graphic legal_case legislation motion_picture song" match="any">
            <text variable="title" form="short" font-style="italic"/>
          </else-if>
          <else>
            <text variable="title" form="short" quotes="true"/>
          </else>
        </choose>
      </substitute>
    </names>
  </macro>
  <macro name="access">
    <choose>
      <if type="thesis">
        <choose>
          <if variable="archive" match="any">
            <group>
              <text term="retrieved" text-case="capitalize-first" suffix=" "/>
              <text term="from" suffix=" "/>
              <text variable="archive" suffix="."/>
              <text variable="archive_location" prefix=" (" suffix=")"/>
            </group>
          </if>
          <else>
            <group>
              <text term="retrieved" text-case="capitalize-first" suffix=" "/>
              <text term="from" suffix=" "/>
              <text variable="URL"/>
            </group>
          </else>
        </choose>
      </if>
      <else>
        <choose>
          <if variable="DOI">
            <text variable="DOI" prefix="doi:"/>
          </if>
          <else>
            <choose>
              <if type="webpage">
                <group delimiter=" ">
                  <text term="retrieved" text-case="capitalize-first" suffix=" "/>
                  <group>
                    <date variable="accessed" suffix=", ">
                      <date-part name="month" suffix=" "/>
                      <date-part name="day" suffix=", "/>
                      <date-part name="year"/>
                    </date>
                  </group>
                  <text term="from"/>
                  <text variable="URL"/>
                </group>
              </if>
              <else>
                <group>
                  <text term="retrieved" text-case="capitalize-first" suffix=" "/>
                  <text term="from" suffix=" "/>
                  <text variable="URL"/>
                </group>
              </else>
            </choose>
          </else>
        </choose>
      </else>
    </choose>
  </macro>
  <macro name="title">
    <choose>
      <if type="report thesis" match="any">
        <text variable="title" font-style="italic"/>
        <group prefix=" (" suffix=")" delimiter=" ">
          <text variable="genre"/>
          <text variable="number" prefix="No. "/>
        </group>
      </if>
      <else-if type="book graphic  motion_picture report song manuscript speech" match="any">
        <text variable="title" font-style="italic"/>
      </else-if>
      <else>
        <text variable="title"/>
      </else>
    </choose>
  </macro>
  <macro name="publisher">
    <choose>
      <if type="report" match="any">
        <group delimiter=": ">
          <text variable="publisher-place"/>
          <text variable="publisher"/>
        </group>
      </if>
      <else-if type="thesis" match="any">
        <group delimiter=", ">
          <text variable="publisher"/>
          <text variable="publisher-place"/>
        </group>
      </else-if>
      <else>
        <group delimiter=", ">
          <choose>
            <if variable="event" match="none">
              <text variable="genre"/>
            </if>
          </choose>
          <choose>
            <if type="article-journal article-magazine" match="none">
              <group delimiter=": ">
                <text variable="publisher-place"/>
                <text variable="publisher"/>
              </group>
            </if>
          </choose>
        </group>
      </else>
    </choose>
  </macro>
  <macro name="event">
    <choose>
      <if variable="event">
        <choose>
          <if variable="genre" match="none">
            <text term="presented at" text-case="capitalize-first" suffix=" "/>
            <text variable="event"/>
          </if>
          <else>
            <group delimiter=" ">
              <text variable="genre" text-case="capitalize-first"/>
              <text term="presented at"/>
              <text variable="event"/>
            </group>
          </else>
        </choose>
      </if>
    </choose>
  </macro>
  <macro name="issued">
    <choose>
      <if type="bill legal_case legislation" match="none">
        <choose>
          <if variable="issued">
            <group prefix=" (" suffix=").">
              <date variable="issued">
                <date-part name="year"/>
              </date>
              <text variable="year-suffix"/>
              <choose>
                <if type="article-journal bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="none">
                  <date variable="issued">
                    <date-part prefix=", " name="month"/>
                    <date-part prefix=" " name="day"/>
                  </date>
                </if>
              </choose>
            </group>
          </if>
          <else>
            <group prefix=" (" suffix=").">
              <text term="no date" form="short"/>
              <text variable="year-suffix" prefix="-"/>
            </group>
          </else>
        </choose>
      </if>
    </choose>
  </macro>
  <macro name="issued-sort">
    <choose>
      <if type="article-journal bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="none">
        <date variable="issued">
          <date-part name="year"/>
          <date-part name="month"/>
          <date-part name="day"/>
        </date>
      </if>
      <else>
        <date variable="issued">
          <date-part name="year"/>
        </date>
      </else>
    </choose>
  </macro>
  <macro name="issued-year">
    <choose>
      <if variable="issued">
        <date variable="issued">
          <date-part name="year"/>
        </date>
        <text variable="year-suffix"/>
      </if>
      <else>
        <text term="no date" form="short"/>
        <text variable="year-suffix" prefix="-"/>
      </else>
    </choose>
  </macro>
  <macro name="edition">
    <choose>
      <if is-numeric="edition">
        <group delimiter=" ">
          <number variable="edition" form="ordinal"/>
          <text term="edition" form="short" suffix="." strip-periods="true"/>
        </group>
      </if>
      <else>
        <text variable="edition" suffix="."/>
      </else>
    </choose>
  </macro>
  <macro name="locators">
    <choose>
      <if type="article-journal article-magazine" match="any">
        <group prefix=", " delimiter=", ">
          <group>
            <text variable="volume" font-style="italic"/>
            <text variable="issue" prefix="(" suffix=")"/>
          </group>
          <text variable="page"/>
        </group>
      </if>
      <else-if type="article-newspaper">
        <group delimiter=" " prefix=", ">
          <label variable="page" form="short"/>
          <text variable="page"/>
        </group>
      </else-if>
      <else-if type="book graphic motion_picture report song chapter paper-conference" match="any">
        <group prefix=" (" suffix=")" delimiter=", ">
          <text macro="edition"/>
          <group>
            <text term="volume" form="short" plural="true" text-case="capitalize-first" suffix=". " strip-periods="true"/>
            <number variable="number-of-volumes" form="numeric" prefix="1-"/>
          </group>
          <group>
            <text term="volume" form="short" text-case="capitalize-first" suffix=". " strip-periods="true"/>
            <number variable="volume" form="numeric"/>
          </group>
          <group>
            <label variable="page" form="short" suffix=" "/>
            <text variable="page"/>
          </group>
        </group>
      </else-if>
      <else-if type="legal_case">
        <group prefix=" (" suffix=")" delimiter=" ">
          <text variable="authority"/>
          <date variable="issued" delimiter=" ">
            <date-part name="month" form="short"/>
            <date-part name="day" suffix=","/>
            <date-part name="year"/>
          </date>
        </group>
      </else-if>
      <else-if type="bill legislation">
        <date variable="issued" prefix=" (" suffix=")">
          <date-part name="year"/>
        </date>
      </else-if>
    </choose>
  </macro>
  <macro name="citation-locator">
    <group>
      <label variable="locator" form="short"/>
      <text variable="locator" prefix=" "/>
    </group>
  </macro>
  <macro name="container">
    <choose>
      <if type="bill legal_case legislation" match="none">
        <text variable="container-title" font-style="italic"/>
      </if>
      <else>
        <group delimiter=" " prefix=", ">
          <choose>
            <if variable="container-title">
              <text variable="volume"/>
              <text variable="container-title"/>
              <group delimiter=" ">
                <!--change to label variable="section" as that becomes available -->
                <text term="section" form="symbol"/>
                <text variable="section"/>
              </group>
              <text variable="page"/>
            </if>
            <else>
              <choose>
                <if type="legal_case">
                  <text variable="number" prefix="No. "/>
                </if>
                <else>
                  <text variable="number" prefix="Pub. L. No. "/>
                  <group delimiter=" ">
                    <!--change to label variable="section" as that becomes available -->
                    <text term="section" form="symbol"/>
                    <text variable="section"/>
                  </group>
                </else>
              </choose>
            </else>
          </choose>
        </group>
      </else>
    </choose>
  </macro>
  <citation et-al-min="6" et-al-use-first="1" et-al-subsequent-min="3" et-al-subsequent-use-first="1" disambiguate-add-year-suffix="true" disambiguate-add-names="true" disambiguate-add-givenname="true" collapse="year" givenname-disambiguation-rule="primary-name">
    <sort>
      <key macro="author"/>
      <key macro="issued-sort"/>
    </sort>
    <layout prefix="(" suffix=")" delimiter="; ">
      <group delimiter=", ">
        <text macro="author-short"/>
        <text macro="issued-year"/>
        <text macro="citation-locator"/>
      </group>
    </layout>
  </citation>
  <bibliography hanging-indent="true" et-al-min="8" et-al-use-first="7" entry-spacing="0" line-spacing="2">
    <sort>
      <key macro="author"/>
      <key macro="issued-sort" sort="ascending"/>
    </sort>
    <layout>
      <group suffix=".">
        <group delimiter=". ">
          <text macro="author"/>
          <text macro="issued"/>
        </group>
        <group delimiter=". ">
          <text macro="title" prefix=" "/>
          <group>
            <text macro="container-contributors"/>
            <text macro="secondary-contributors"/>
            <group delimiter=", ">
              <text macro="container"/>
              <text variable="collection-title"/>
            </group>
          </group>
        </group>
        <text macro="locators"/>
        <group delimiter=", " prefix=". ">
          <text macro="event"/>
          <text macro="publisher"/>
        </group>
      </group>
      <text macro="access" prefix=" "/>
    </layout>
  </bibliography>
</style>


================================================
FILE: figs/paper/columns.tex
================================================
\newenvironment{cols}[1][]{}{}

\newenvironment{col}[1]{\begin{minipage}{#1}\ignorespaces}{%
\end{minipage}
\ifhmode\unskip\fi
\aftergroup\useignorespacesandallpars}

\def\useignorespacesandallpars#1\ignorespaces\fi{%
#1\fi\ignorespacesandallpars}

\makeatletter
\def\ignorespacesandallpars{%
  \@ifnextchar\par
    {\expandafter\ignorespacesandallpars\@gobble}%
    {}%
}
\makeatother

================================================
FILE: figs/paper/paper.Rmd
================================================
---
title: 'infer: An R package for tidyverse-friendly statistical inference'
tags:
  - data science
  - tidyverse
  - inference
  - R
authors:
- name: Simon P. Couch
  orcid: 0000-0001-5676-5107
  affiliation: "1, 2"
- name: Andrew P. Bray
  orcid: 0000-0002-4037-7414
  affiliation: 3
- name: Chester Ismay
  orcid: 0000-0003-2820-2547
  affiliation: 4
- name: Evgeni Chasnovski
  orcid: 0000-0002-1617-4019
  affiliation: 5
- name: Benjamin S. Baumer
  orcid: 0000-0002-3279-0516
  affiliation: 6
- name: Mine Çetinkaya-Rundel
  orcid: 0000-0001-6452-2420
  affiliation: "2, 7"
affiliations:
 - name: Johns Hopkins, Department of Biostatistics
   index: 1
 - name: RStudio
   index: 2
 - name: UC Berkeley, Department of Statistics and Reed College Mathematics Department (on leave)
   index: 3
 - name: Flatiron School
   index: 4
 - name: No Affiliation
   index: 5
 - name: Smith College, Program in Statistical & Data Sciences
   index: 6
 - name: Duke University, Department of Statistical Science
   index: 7

citation_author: Couch et. al.
date: 12 June 2021
year: 2021
bibliography: paper.bib
output: 
  rticles::joss_article:
    keep_tex: true
    includes:
      in_header: columns.tex
csl: apa.csl
journal: JOSS
---

# Summary

`infer` implements an expressive grammar to perform statistical inference that adheres to the `tidyverse` design framework [@wickham2019welcome]. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests and confidence intervals into a set of four main verbs (functions), supplemented with many utilities to visualize and extract value from their outputs.

# Statement of Need

Packages implementing methods for basic statistical inference in R are highly variable in their interfaces. The structure of inputted data, argument names, expected argument types, argument orders, output types, and spelling cases varies widely both within and among packages. This diversity in approaches obscures the intuition shared among common inferential procedures, makes details of usage difficult to remember, and prevents an expressive and idiomatic coding style.

`infer` is an R package for randomization-based hypothesis testing, naturalizing an intuitive understanding of statistical inference via a unified and expressive grammar. Four functions provide functionality encompassing a large swath of basic frequentist statistical inference, abstracting away details of specific tests and shifting the focus of the analyst to the observed data and the processes that generated it. Such a grammar lends itself to applications in teaching, data pedagogy research, applied scientific research, and advanced predictive modeling. For one, the principled approach of the `infer` package has made it an especially good fit for teaching introductory statistics and data science [@ismay2019statistical; @baumer2020teaching; @cetinkaya2021fresh] and research in data pedagogy [@fergusson2021introducing; @loy2021bringing]. Further, the package has already seen usage in a number of published scientific applications [@mclean2021controlled; @ask2021per; @fallon2021single]. Finally, the package integrates with the greater tidymodels collection of packages, a burgeoning software ecosystem for tidyverse-aligned predictive modeling used across many modern research and industrial applications [@kuhn2020tidymodels]. To date, the package has been downloaded more than 400,000 times.

# Underlying Principles

Regardless of the hypothesis test in question, an analyst asks the same kind of question when conducting statistical inference: is the effect/difference in the observed data real, or due to random chance? To answer this question, the analyst begins by assuming that the effect in the observed data was simply due to random chance, and calls this assumption the *null hypothesis*. (In reality, they might not believe in the null hypothesis at all---the null hypothesis is in opposition to the *alternate hypothesis*, which supposes that the effect present in the observed data is actually due to the fact that "something is going on.") The analyst then calculates a *test statistic* from the data that describes the observed effect. They can use this test statistic to calculate a *p-value* via juxtaposition with a *null distribution*, giving the probability that the observed data could come about if the null hypothesis were true. If this probability is below some pre-defined *significance level* $\alpha$, then the analyst can reject the null hypothesis.

The workflow of this package is designed around this idea. Starting out with some dataset,

+ `specify()` allows the analyst to specify the variable, or relationship between variables, that they are interested in.
+ `hypothesize()` allows the analyst to declare the null hypothesis.
+ `generate()` allows the analyst to generate data reflecting the null hypothesis or using the bootstrap.
+ `calculate()` allows the analyst to calculate summary statistics, either from
     * the observed data, to form the observed test statistic.
     * data `generate()`d to reflect the null hypothesis, to form a randomization-based null distribution of test statistics.

As such, the ultimate output of an infer pipeline using these four functions is generally an _observed statistic_ or _null distribution_ of test statistics. These four functions are thus supplemented with several utilities to visualize and extract value from their outputs.

+ `visualize()` plots the null distribution of test statistics.
     * `shade_p_value()` situates the observed statistic in the null distribution, shading the region as or more extreme.
+ `get_p_value()` calculates a p-value via the juxtaposition of the test statistic and the null distribution.

The workflow outlined above can also be used for constructing confidence intervals via bootstrapping with the omission of the `hypothesize()` step in the pipeline. The resulting bootstrap distribution can then be visualized with `visualize()`, the confidence interval region can be situated in the bootstrap distribution with `shade_confidence_interval()`, and the bounds of the confidence interval can be calculated with `get_confidence_interval()`.

Beyond this, the `infer` package offers:

* methods for inference using theory-based distributions
* shorthand wrappers for common statistical tests using tidy data
* model-fitting workflows to accommodate multiple explanatory variables

# Comparison to Other Packages

Several software packages on the Comprehensive R Archive Network share functionality with `infer` [@CRAN]. `broom` and `parameters` convert model objects to unified output formats, though they do not provide methods for fitting models, describing null distributions, performing bootstrapping, or calculating summary statistics from tabular data [@r-broom; @r-parameters]. `statsExpressions`, and adjacent packages in the `easystats` ecosystem, implement wrappers with consistent interfaces for theory-based hypothesis tests [@r-statsExpressions]. Similarly, `mosaic` is a package used to teach statistics by unifying summary statistics, visualization, and modeling with a consistent API built around R's formula interface. The `mosaic` package also includes functionality to conduct randomization-based inference [@r-mosaic]. At a higher level, though, the structure of each of these packages is defined by model types and statistics, where each model type or statistic has its own associated function and/or object class. In contrast, `infer` is structured around four functions, situating statistics and model types within a more abstracted grammar.^[This grammar follows from Allen Downey's "there is only one test" framework [@downey2016].] 

# Acknowledgements

We acknowledge contributions from Albert Y. Kim, Jo Hardin, Jay Lee, Amelia McNamara, Nick Solomon, and Richie Cotton.

# References


================================================
FILE: figs/paper/paper.bib
================================================
@book{ismay2019statistical,
  title={Statistical Inference via Data Science: A ModernDive into {R} and the Tidyverse},
  author={Ismay, Chester and Kim, Albert Y},
  year={2019},
  publisher={CRC Press},
  doi="10.1080/00224065.2020.1848366"
}

@article{baumer2020teaching,
  title={Teaching Introductory Statistics with DataCamp},
  author={Baumer, Benjamin S and Bray, Andrew P and {\c{C}}etinkaya-Rundel, Mine and Hardin, Johanna S},
  journal={Journal of Statistics Education},
  volume={28},
  number={1},
  pages={89--97},
  year={2020},
  publisher={Taylor \& Francis},
  doi="10.1080/10691898.2020.1730734"
}

@article{cetinkaya2021fresh,
  title={A fresh look at introductory data science},
  author={{\c{C}}etinkaya-Rundel, Mine and Ellison, Victoria},
  journal={Journal of Statistics and Data Science Education},
  volume={29},
  number={sup1},
  pages={S16--S26},
  year={2021},
  publisher={Taylor \& Francis},
  doi="10.1080/10691898.2020.1804497"
}

@article{fergusson2021introducing,
  title={Introducing teachers who use GUI-driven tools for the randomization test to code-driven tools},
  author={Fergusson, Anna and Pfannkuch, Maxine},
  journal={Mathematical Thinking and Learning},
  pages={1--21},
  year={2021},
  publisher={Taylor \& Francis},
  doi="10.1080/10986065.2021.1922856"
}

@article{loy2021bringing,
  title={Bringing Visual Inference to the Classroom},
  author={Loy, Adam},
  journal={Journal of Statistics and Data Science Education},
  pages={1--12},
  year={2021},
  publisher={Taylor \& Francis},
  doi="10.1080/26939169.2021.1920866"
}

@article{mclean2021controlled,
  title={Controlled Cytoplast Arrest and Morula Aggregation Enhance Development, Cryoresilience, and In Vivo Survival of Cloned Sheep Embryos},
  author={McLean, Zachariah Louis and Appleby, Sarah Jane and Fermin, Lisanne Monique and Henderson, Harold Victor and Wei, Jingwei and Wells, David Norman and Oback, Bj{\"o}rn},
  journal={Cellular Reprogramming},
  volume={23},
  number={1},
  pages={14--25},
  year={2021},
  publisher={Mary Ann Liebert, Inc., publishers 140 Huguenot Street, 3rd Floor New~…},
  doi="10.1089/cell.2020.0078"
}

@article{ask2021per,
  title={Per-and Polyfluoroalkyl Substances Are Positively Associated with Thyroid Hormones in an Arctic Seabird},
  author={Ask, Amalie Vigdel and Jenssen, Bj{\o}rn Munro and Tartu, Sabrina and Angelier, Fr{\'e}d{\'e}ric and Chastel, Olivier and Gabrielsen, Geir Wing},
  journal={Environmental Toxicology and Chemistry},
  volume={40},
  number={3},
  pages={820--831},
  year={2021},
  publisher={Wiley Online Library},
  doi="10.1002/etc.4978"
}

@article{fallon2021single,
  title={Single cell morphological metrics and cytoskeletal alignment regulate VCAM-1 protein expression},
  author={Fallon, Meghan E and Hinds, Monica T},
  journal={Biochemical and Biophysical Research Communications},
  volume={555},
  pages={160--167},
  year={2021},
  publisher={Elsevier},
  doi="10.1016/j.bbrc.2021.03.129"
}

@article{kuhn2020tidymodels,
  title={{tidymodels}: a collection of packages for modeling and machine learning using tidyverse principles},
  author={Kuhn, M and Wickham, H},
  journal={Boston, MA, USA.},
  year={2020},
  url={https://tidymodels.org}
}

@article{wickham2019welcome,
  title={Welcome to the Tidyverse},
  author={Wickham, Hadley and Averick, Mara and Bryan, Jennifer and Chang, Winston and McGowan, Lucy D'Agostino and Fran{\c{c}}ois, Romain and Grolemund, Garrett and Hayes, Alex and Henry, Lionel and Hester, Jim and others},
  journal={Journal of Open Source Software},
  volume={4},
  number={43},
  pages={1686},
  year={2019},
  doi="10.21105/joss.01686"
}

@Manual{CRAN,
  title        = {R: A Language and Environment for Statistical
                  Computing},
  author       = {{R Core Team}},
  organization = {R Foundation for Statistical Computing},
  address      = {Vienna, Austria},
  year         = 2021,
  url = {https://www.R-project.org/}
}

@article{downey2016,
  title={There is still only one test},
  author={Downey, Allen B},
  year={2016},
  publisher={"Probably Overthinking It"},
  url = {http://allendowney.blogspot.com/2016/06/there-is-still-only-one-test.html}
}

@Article{r-mosaic,
  author = {Randall Pruim and Daniel T Kaplan and Nicholas J Horton},
  title = {The mosaic Package: Helping Students to `Think with Data' Using {R}},
  journal = {The R Journal},
  volume = {9},
  number = {1},
  pages = {77--102},
  year = {2017},
  doi = {10.32614/RJ-2017-024},
}

@Article{r-statsExpressions,
  doi = {10.21105/joss.03236},
  url = {https://doi.org/10.21105/joss.03236},
  year = {2021},
  publisher = {{The Open Journal}},
  volume = {6},
  number = {61},
  pages = {3236},
  author = {Indrajeet Patil},
  title = {{statsExpressions: {R} Package for Tidy Dataframes and Expressions with Statistical Details}},
  journal = {{Journal of Open Source Software}},
}

@Article{r-parameters,
  title = {Extracting, Computing and Exploring the Parameters of Statistical Models using {R}.},
  volume = {5},
  doi = {10.21105/joss.02445},
  number = {53},
  journal = {Journal of Open Source Software},
  author = {Daniel Lüdecke and Mattan S. Ben-Shachar and Indrajeet Patil and Dominique Makowski},
  year = {2020},
  pages = {2445},
}
  
@Manual{r-broom,
  title = {{broom}: Convert Statistical Objects into Tidy Tibbles},
  author = {David Robinson and Alex Hayes and Simon Couch},
  year = {2021},
  note = {R package version 0.7.9},
  url = {https://CRAN.R-project.org/package=broom},
}


================================================
FILE: figs/paper/paper.log
================================================
This is XeTeX, Version 3.14159265-2.6-0.99999 (TeX Live 2018) (preloaded format=xelatex 2018.4.16)  15 SEP 2021 07:48
entering extended mode
 restricted \write18 enabled.
 %&-line parsing enabled.
**paper.tex
(./paper.tex
LaTeX2e <2018-04-01> patch level 2
Babel <3.18> and hyphenation patterns for 84 language(s) loaded.
(/usr/local/texlive/2018/texmf-dist/tex/latex/base/article.cls
Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
(/usr/local/texlive/2018/texmf-dist/tex/latex/base/size10.clo
File: size10.clo 2014/09/29 v1.4h Standard LaTeX file (size option)
)
\c@part=\count80
\c@section=\count81
\c@subsection=\count82
\c@subsubsection=\count83
\c@paragraph=\count84
\c@subparagraph=\count85
\c@figure=\count86
\c@table=\count87
\abovecaptionskip=\skip41
\belowcaptionskip=\skip42
\bibindent=\dimen102
) (/usr/local/texlive/2018/texmf-dist/tex/latex/marginnote/marginnote.sty
Package: marginnote 2017/04/22 v1.2b non floating margin notes for LaTeX
\c@mn@abspage=\count88
) (/usr/local/texlive/2018/texmf-dist/tex/latex/graphics/graphicx.sty
Package: graphicx 2017/06/01 v1.1a Enhanced LaTeX Graphics (DPC,SPQR)
(/usr/local/texlive/2018/texmf-dist/tex/latex/graphics/keyval.sty
Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
\KV@toks@=\toks14
) (/usr/local/texlive/2018/texmf-dist/tex/latex/graphics/graphics.sty
Package: graphics 2017/06/25 v1.2c Standard LaTeX Graphics (DPC,SPQR)
(/usr/local/texlive/2018/texmf-dist/tex/latex/graphics/trig.sty
Package: trig 2016/01/03 v1.10 sin cos tan (DPC)
) (/usr/local/texlive/2018/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
)
Package graphics Info: Driver file: xetex.def on input line 99.
(/usr/local/texlive/2018/texmf-dist/tex/latex/graphics-def/xetex.def
File: xetex.def 2017/06/24 v5.0h Graphics/color driver for xetex
))
\Gin@req@height=\dimen103
\Gin@req@width=\dimen104
) (/usr/local/texlive/2018/texmf-dist/tex/latex/xcolor/xcolor.sty
Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK)
(/usr/local/texlive/2018/texmf-dist/tex/latex/graphics-cfg/color.cfg
File: color.cfg 2016/01/02 v1.6 sample color configuration
)
Package xcolor Info: Driver file: xetex.def on input line 225.
Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348.
Package xcolor Info: Model `RGB' extended on input line 1364.
Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366.
Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367.
Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368.
Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369.
Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370.
Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371.
) (/usr/local/texlive/2018/texmf-dist/tex/latex/preprint/authblk.sty
Package: authblk 2001/02/27 1.3 (PWD)
\affilsep=\skip43
\@affilsep=\skip44
\c@Maxaffil=\count89
\c@authors=\count90
\c@affil=\count91
) (/usr/local/texlive/2018/texmf-dist/tex/latex/etoolbox/etoolbox.sty
Package: etoolbox 2018/02/11 v2.5e e-TeX tools for LaTeX (JAW)
\etb@tempcnta=\count92
) (/usr/local/texlive/2018/texmf-dist/tex/latex/titlesec/titlesec.sty
Package: titlesec 2016/03/21 v2.10.2 Sectioning titles
\ttl@box=\box26
\beforetitleunit=\skip45
\aftertitleunit=\skip46
\ttl@plus=\dimen105
\ttl@minus=\dimen106
\ttl@toksa=\toks15
\titlewidth=\dimen107
\titlewidthlast=\dimen108
\titlewidthfirst=\dimen109
) (/usr/local/texlive/2018/texmf-dist/tex/latex/tools/calc.sty
Package: calc 2017/05/25 v4.3 Infix arithmetic (KKT,FJ)
\calc@Acount=\count93
\calc@Bcount=\count94
\calc@Adimen=\dimen110
\calc@Bdimen=\dimen111
\calc@Askip=\skip47
\calc@Bskip=\skip48
LaTeX Info: Redefining \setlength on input line 80.
LaTeX Info: Redefining \addtolength on input line 81.
\calc@Ccount=\count95
\calc@Cskip=\skip49
) (/usr/local/texlive/2018/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/us
r/local/texlive/2018/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/usr/local/te
xlive/2018/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.te
x
\pgfutil@everybye=\toks16
\pgfutil@tempdima=\dimen112
\pgfutil@tempdimb=\dimen113

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/utilities/pgfutil-common-li
sts.tex))
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
\pgfutil@abb=\box27
(/usr/local/texlive/2018/texmf-dist/tex/latex/ms/everyshi.sty
Package: everyshi 2001/05/15 v3.00 EveryShipout Package (MS)
)) (/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.te
x
Package: pgfrcs 2015/08/07 v3.0.1a (rcs-revision 1.31)
))
Package: pgf 2015/08/07 v3.0.1a (rcs-revision 1.15)
(/usr/local/texlive/2018/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/usr/
local/texlive/2018/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex
Package: pgfsys 2014/07/09 v3.0.1a (rcs-revision 1.48)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex
\pgfkeys@pathtoks=\toks17
\pgfkeys@temptoks=\toks18

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/utilities/pgfkeysfiltered.c
ode.tex
\pgfkeys@tmptoks=\toks19
))
\pgf@x=\dimen114
\pgf@y=\dimen115
\pgf@xa=\dimen116
\pgf@ya=\dimen117
\pgf@xb=\dimen118
\pgf@yb=\dimen119
\pgf@xc=\dimen120
\pgf@yc=\dimen121
\w@pgf@writea=\write3
\r@pgf@reada=\read1
\c@pgf@counta=\count96
\c@pgf@countb=\count97
\c@pgf@countc=\count98
\c@pgf@countd=\count99
\t@pgf@toka=\toks20
\t@pgf@tokb=\toks21
\t@pgf@tokc=\toks22
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg
File: pgf.cfg 2008/05/14  (rcs-revision 1.7)
)
Driver file for pgf: pgfsys-xetex.def

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-xetex.de
f
File: pgfsys-xetex.def 2014/07/09  (rcs-revision 1.11)

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-dvipdfmx
.def
File: pgfsys-dvipdfmx.def 2014/07/09  (rcs-revision 1.14)

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-p
df.def
File: pgfsys-common-pdf.def 2013/10/10  (rcs-revision 1.13)
)
\pgfsys@objnum=\count100
)))
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.
code.tex
File: pgfsyssoftpath.code.tex 2013/09/09  (rcs-revision 1.9)
\pgfsyssoftpath@smallbuffer@items=\count101
\pgfsyssoftpath@bigbuffer@items=\count102
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.
code.tex
File: pgfsysprotocol.code.tex 2006/10/16  (rcs-revision 1.4)
))
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex
Package: pgfcore 2010/04/11 v3.0.1a (rcs-revision 1.7)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/usr
/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex (/usr/
local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex
\pgfmath@dimen=\dimen122
\pgfmath@count=\count103
\pgfmath@box=\box28
\pgfmath@toks=\toks23
\pgfmath@stack@operand=\toks24
\pgfmath@stack@operation=\toks25
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.
tex
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic
.code.tex)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigo
nometric.code.tex)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.rando
m.code.tex)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.compa
rison.code.tex)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.
code.tex)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round
.code.tex)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.
code.tex)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integ
erarithmetics.code.tex))) (/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/m
ath/pgfmathfloat.code.tex
\c@pgfmathroundto@lastzeros=\count104
))
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.co
de.tex
File: pgfcorepoints.code.tex 2013/10/07  (rcs-revision 1.27)
\pgf@picminx=\dimen123
\pgf@picmaxx=\dimen124
\pgf@picminy=\dimen125
\pgf@picmaxy=\dimen126
\pgf@pathminx=\dimen127
\pgf@pathmaxx=\dimen128
\pgf@pathminy=\dimen129
\pgf@pathmaxy=\dimen130
\pgf@xx=\dimen131
\pgf@xy=\dimen132
\pgf@yx=\dimen133
\pgf@yy=\dimen134
\pgf@zx=\dimen135
\pgf@zy=\dimen136
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconst
ruct.code.tex
File: pgfcorepathconstruct.code.tex 2013/10/07  (rcs-revision 1.29)
\pgf@path@lastx=\dimen137
\pgf@path@lasty=\dimen138
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage
.code.tex
File: pgfcorepathusage.code.tex 2014/11/02  (rcs-revision 1.24)
\pgf@shorten@end@additional=\dimen139
\pgf@shorten@start@additional=\dimen140
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.co
de.tex
File: pgfcorescopes.code.tex 2015/05/08  (rcs-revision 1.46)
\pgfpic=\box29
\pgf@hbox=\box30
\pgf@layerbox@main=\box31
\pgf@picture@serial@count=\count105
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicst
ate.code.tex
File: pgfcoregraphicstate.code.tex 2014/11/02  (rcs-revision 1.12)
\pgflinewidth=\dimen141
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransform
ations.code.tex
File: pgfcoretransformations.code.tex 2015/08/07  (rcs-revision 1.20)
\pgf@pt@x=\dimen142
\pgf@pt@y=\dimen143
\pgf@pt@temp=\dimen144
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.cod
e.tex
File: pgfcorequick.code.tex 2008/10/09  (rcs-revision 1.3)
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.c
ode.tex
File: pgfcoreobjects.code.tex 2006/10/11  (rcs-revision 1.2)
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathproce
ssing.code.tex
File: pgfcorepathprocessing.code.tex 2013/09/09  (rcs-revision 1.9)
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.co
de.tex
File: pgfcorearrows.code.tex 2015/05/14  (rcs-revision 1.43)
\pgfarrowsep=\dimen145
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.cod
e.tex
File: pgfcoreshade.code.tex 2013/07/15  (rcs-revision 1.15)
\pgf@max=\dimen146
\pgf@sys@shading@range@num=\count106
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.cod
e.tex
File: pgfcoreimage.code.tex 2013/07/15  (rcs-revision 1.18)

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.
code.tex
File: pgfcoreexternal.code.tex 2014/07/09  (rcs-revision 1.21)
\pgfexternal@startupbox=\box32
))
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.co
de.tex
File: pgfcorelayers.code.tex 2013/07/18  (rcs-revision 1.7)
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretranspare
ncy.code.tex
File: pgfcoretransparency.code.tex 2013/09/30  (rcs-revision 1.5)
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.
code.tex
File: pgfcorepatterns.code.tex 2013/11/07  (rcs-revision 1.5)
)))
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.cod
e.tex
File: pgfmoduleshapes.code.tex 2014/03/21  (rcs-revision 1.35)
\pgfnodeparttextbox=\box33
)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.
tex
File: pgfmoduleplot.code.tex 2015/08/03  (rcs-revision 1.13)
)
(/usr/local/texlive/2018/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version
-0-65.sty
Package: pgfcomp-version-0-65 2007/07/03 v3.0.1a (rcs-revision 1.7)
\pgf@nodesepstart=\dimen147
\pgf@nodesepend=\dimen148
)
(/usr/local/texlive/2018/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version
-1-18.sty
Package: pgfcomp-version-1-18 2007/07/23 v3.0.1a (rcs-revision 1.1)
)) (/usr/local/texlive/2018/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/usr
/local/texlive/2018/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/usr/local/
texlive/2018/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/usr/loca
l/texlive/2018/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/usr/local/texlive/20
18/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/usr/local/texlive/2018/
texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex
Package: pgffor 2013/12/13 v3.0.1a (rcs-revision 1.25)
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)
\pgffor@iter=\dimen149
\pgffor@skip=\dimen150
\pgffor@stack=\toks26
\pgffor@toks=\toks27
))
(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.cod
e.tex
Package: tikz 2015/08/07 v3.0.1a (rcs-revision 1.151)

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothan
dlers.code.tex
File: pgflibraryplothandlers.code.tex 2013/08/31 v3.0.1a (rcs-revision 1.20)
\pgf@plot@mark@count=\count107
\pgfplotmarksize=\dimen151
)
\tikz@lastx=\dimen152
\tikz@lasty=\dimen153
\tikz@lastxsaved=\dimen154
\tikz@lastysaved=\dimen155
\tikzleveldistance=\dimen156
\tikzsiblingdistance=\dimen157
\tikz@figbox=\box34
\tikz@figbox@bg=\box35
\tikz@tempbox=\box36
\tikz@tempbox@bg=\box37
\tikztreelevel=\count108
\tikznumberofchildren=\count109
\tikznumberofcurrentchild=\count110
\tikz@fig@count=\count111

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.cod
e.tex
File: pgfmodulematrix.code.tex 2013/09/17  (rcs-revision 1.8)
\pgfmatrixcurrentrow=\count112
\pgfmatrixcurrentcolumn=\count113
\pgf@matrix@numberofcolumns=\count114
)
\tikz@expandcount=\count115

(/usr/local/texlive/2018/texmf-dist/tex/generic/pgf/frontendlayer/tikz/librarie
s/tikzlibrarytopaths.code.tex
File: tikzlibrarytopaths.code.tex 2008/06/17 v3.0.1a (rcs-revision 1.2)
))) (/usr/local/texlive/2018/texmf-dist/tex/latex/hyperref/hyperref.sty
Package: hyperref 2018/02/06 v6.86b Hypertext links for LaTeX
(/usr/local/texlive/2018/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty
Package: hobsub-hyperref 2016/05/16 v1.14 Bundle oberdiek, subset hyperref (HO)

(/usr/local/texlive/2018/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty
Package: hobsub-generic 2016/05/16 v1.14 Bundle oberdiek, subset generic (HO)
Package: hobsub 2016/05/16 v1.14 Construct package bundles (HO)
Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO)
Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO)
Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO)
Package ifluatex Info: LuaTeX not detected.
Package: ifvtex 2016/05/16 v1.6 Detect VTeX and its facilities (HO)
Package ifvtex Info: VTeX not detected.
Package: intcalc 2016/05/16 v1.2 Expandable calculations with integers (HO)
Package: ifpdf 2017/03/15 v3.2 Provides the ifpdf switch
Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO)
Package etexcmds Info: Could not find \expanded.
(etexcmds)             That can mean that you are not using pdfTeX 1.50 or
(etexcmds)             that some package has redefined \expanded.
(etexcmds)             In the latter case, load this package earlier.
Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO)
Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO)
Package: pdftexcmds 2018/01/30 v0.27 Utility functions of pdfTeX for LuaTeX (HO
)
Package pdftexcmds Info: LuaTeX not detected.
Package pdftexcmds Info: pdfTeX >= 1.30 not detected.
Package pdftexcmds Info: \pdf@primitive is available.
Package pdftexcmds Info: \pdf@ifprimitive is available.
Package pdftexcmds Info: \pdfdraftmode not found.
Package: pdfescape 2016/05/16 v1.14 Implements pdfTeX's escape features (HO)
Package: bigintcalc 2016/05/16 v1.4 Expandable calculations on big integers (HO
)
Package: bitset 2016/05/16 v1.2 Handle bit-vector datatype (HO)
Package: uniquecounter 2016/05/16 v1.3 Provide unlimited unique counter (HO)
)
Package hobsub Info: Skipping package `hobsub' (already loaded).
Package: letltxmacro 2016/05/16 v1.5 Let assignment for LaTeX macros (HO)
Package: hopatch 2016/05/16 v1.3 Wrapper for package hooks (HO)
Package: xcolor-patch 2016/05/16 xcolor patch
Package: atveryend 2016/05/16 v1.9 Hooks at the very end of document (HO)
Package: atbegshi 2016/06/09 v1.18 At begin shipout hook (HO)
Package: refcount 2016/05/16 v3.5 Data extraction from label references (HO)
Package: hycolor 2016/05/16 v1.8 Color options for hyperref/bookmark (HO)
) (/usr/local/texlive/2018/texmf-dist/tex/generic/ifxetex/ifxetex.sty
Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional
) (/usr/local/texlive/2018/texmf-dist/tex/latex/oberdiek/auxhook.sty
Package: auxhook 2016/05/16 v1.4 Hooks for auxiliary files (HO)
) (/usr/local/texlive/2018/texmf-dist/tex/latex/oberdiek/kvoptions.sty
Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO)
)
\@linkdim=\dimen158
\Hy@linkcounter=\count116
\Hy@pagecounter=\count117
(/usr/local/texlive/2018/texmf-dist/tex/latex/hyperref/pd1enc.def
File: pd1enc.def 2018/02/06 v6.86b Hyperref: PDFDocEncoding definition (HO)
)
\Hy@SavedSpaceFactor=\count118
(/usr/local/texlive/2018/texmf-dist/tex/latex/latexconfig/hyperref.cfg
File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive
)
Package hyperref Info: Hyper figures OFF on input line 4509.
Package hyperref Info: Link nesting OFF on input line 4514.
Package hyperref Info: Hyper index ON on input line 4517.
Package hyperref Info: Plain pages OFF on input line 4524.
Package hyperref Info: Backreferencing OFF on input line 4529.
Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
Package hyperref Info: Bookmarks ON on input line 4762.
\c@Hy@tempcnt=\count119
(/usr/local/texlive/2018/texmf-dist/tex/latex/url/url.sty
\Urlmuskip=\muskip10
Package: url 2013/09/16  ver 3.4  Verb mode for urls, etc.
)
LaTeX Info: Redefining \url on input line 5115.
\XeTeXLinkMargin=\dimen159
\Fld@menulength=\count120
\Field@Width=\dimen160
\Fld@charsize=\dimen161
Package hyperref Info: Hyper figures OFF on input line 6369.
Package hyperref Info: Link nesting OFF on input line 6374.
Package hyperref Info: Hyper index ON on input line 6377.
Package hyperref Info: backreferencing OFF on input line 6384.
Package hyperref Info: Link coloring OFF on input line 6389.
Package hyperref Info: Link coloring with OCG OFF on input line 6394.
Package hyperref Info: PDF/A mode OFF on input line 6399.
LaTeX Info: Redefining \ref on input line 6439.
LaTeX Info: Redefining \pageref on input line 6443.
\Hy@abspage=\count121
\c@Item=\count122
\c@Hfootnote=\count123
)
Package hyperref Info: Driver (autodetected): hxetex.
(/usr/local/texlive/2018/texmf-dist/tex/latex/hyperref/hxetex.def
File: hxetex.def 2018/02/06 v6.86b Hyperref driver for XeTeX
(/usr/local/texlive/2018/texmf-dist/tex/latex/hyperref/puenc.def
File: puenc.def 2018/02/06 v6.86b Hyperref: PDF Unicode definition (HO)
) (/usr/local/texlive/2018/texmf-dist/tex/generic/oberdiek/stringenc.sty
Package: stringenc 2016/05/16 v1.11 Convert strings between diff. encodings (HO
)
)
\pdfm@box=\box38
\c@Hy@AnnotLevel=\count124
\HyField@AnnotCount=\count125
\Fld@listcount=\count126
\c@bookmark@seq@number=\count127
(/usr/local/texlive/2018/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty
Package: rerunfilecheck 2016/05/16 v1.8 Rerun checks for auxiliary files (HO)
Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2
82.
)
\Hy@SectionHShift=\skip50
)
Package hyperref Info: Option `colorlinks' set `true' on input line 12.
Package hyperref Info: Option `breaklinks' set `true' on input line 12.
(/usr/local/texlive/2018/texmf-dist/tex/latex/caption/caption.sty
Package: caption 2016/02/21 v3.3-144 Customizing captions (AR)
(/usr/local/texlive/2018/texmf-dist/tex/latex/caption/caption3.sty
Package: caption3 2016/05/22 v1.7-166 caption3 kernel (AR)
Package caption3 Info: TeX engine: e-TeX on input line 67.
\captionmargin=\dimen162
\captionmargin@=\dimen163
\captionwidth=\dimen164
\caption@tempdima=\dimen165
\caption@indent=\dimen166
\caption@parindent=\dimen167
\caption@hangindent=\dimen168
)
\c@ContinuedFloat=\count128
Package caption Info: hyperref package is loaded.
) (/usr/local/texlive/2018/texmf-dist/tex/latex/tcolorbox/tcolorbox.sty
Package: tcolorbox 2018/03/22 version 4.13 text color boxes
(/usr/local/texlive/2018/texmf-dist/tex/latex/tools/verbatim.sty
Package: verbatim 2014/10/28 v1.5q LaTeX2e package for verbatim enhancements
\every@verbatim=\toks28
\verbatim@line=\toks29
\verbatim@in@stream=\read2
) (/usr/local/texlive/2018/texmf-dist/tex/latex/environ/environ.sty
Package: environ 2014/05/04 v0.3 A new way to define environments
(/usr/local/texlive/2018/texmf-dist/tex/latex/trimspaces/trimspaces.sty
Package: trimspaces 2009/09/17 v1.1 Trim spaces around a token list
)
\@envbody=\toks30
)
\tcb@titlebox=\box39
\tcb@upperbox=\box40
\tcb@lowerbox=\box41
\tcb@phantombox=\box42
\c@tcbbreakpart=\count129
\c@tcblayer=\count130
\tcolorbox@number=\count131
\tcb@temp=\box43
\tcb@temp=\box44
\tcb@temp=\box45
\tcb@temp=\box46
\tcb@out=\write4
\tcb@record@out=\write5
) (/usr/local/texlive/2018/texmf-dist/tex/latex/amsfonts/amssymb.sty
Package: amssymb 2013/01/14 v3.01 AMS font symbols
(/usr/local/texlive/2018/texmf-dist/tex/latex/amsfonts/amsfonts.sty
Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
\@emptytoks=\toks31
\symAMSa=\mathgroup4
\symAMSb=\mathgroup5
LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
(Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
)) (/usr/local/texlive/2018/texmf-dist/tex/latex/amsmath/amsmath.sty
Package: amsmath 2017/09/02 v2.17a AMS math features
\@mathmargin=\skip51
For additional information on amsmath, use the `?' option.
(/usr/local/texlive/2018/texmf-dist/tex/latex/amsmath/amstext.sty
Package: amstext 2000/06/29 v2.01 AMS text
(/usr/local/texlive/2018/texmf-dist/tex/latex/amsmath/amsgen.sty
File: amsgen.sty 1999/11/30 v2.0 generic functions
\@emptytoks=\toks32
\ex@=\dimen169
)) (/usr/local/texlive/2018/texmf-dist/tex/latex/amsmath/amsbsy.sty
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
\pmbraise@=\dimen170
) (/usr/local/texlive/2018/texmf-dist/tex/latex/amsmath/amsopn.sty
Package: amsopn 2016/03/08 v2.02 operator names
)
\inf@bad=\count132
LaTeX Info: Redefining \frac on input line 213.
\uproot@=\count133
\leftroot@=\count134
LaTeX Info: Redefining \overline on input line 375.
\classnum@=\count135
\DOTSCASE@=\count136
LaTeX Info: Redefining \ldots on input line 472.
LaTeX Info: Redefining \dots on input line 475.
LaTeX Info: Redefining \cdots on input line 596.
\Mathstrutbox@=\box47
\strutbox@=\box48
\big@size=\dimen171
LaTeX Font Info:    Redeclaring font encoding OML on input line 712.
LaTeX Font Info:    Redeclaring font encoding OMS on input line 713.
\macc@depth=\count137
\c@MaxMatrixCols=\count138
\dotsspace@=\muskip11
\c@parentequation=\count139
\dspbrk@lvl=\count140
\tag@help=\toks33
\row@=\count141
\column@=\count142
\maxfields@=\count143
\andhelp@=\toks34
\eqnshift@=\dimen172
\alignsep@=\dimen173
\tagshift@=\dimen174
\tagwidth@=\dimen175
\totwidth@=\dimen176
\lineht@=\dimen177
\@envbody=\toks35
\multlinegap=\skip52
\multlinetaggap=\skip53
\mathdisplay@stack=\toks36
LaTeX Info: Redefining \[ on input line 2817.
LaTeX Info: Redefining \] on input line 2818.
) (/usr/local/texlive/2018/texmf-dist/tex/latex/seqsplit/seqsplit.sty
Package: seqsplit 2006/08/07 v0.1 Splitting long sequences (DNA, RNA, proteins,
 etc.) 
) (/usr/local/texlive/2018/texmf-dist/tex/latex/base/fixltx2e.sty
Package: fixltx2e 2016/12/29 v2.1a fixes to LaTeX (obsolete)
Applying: [2015/01/01] Old fixltx2e package on input line 46.

Package fixltx2e Warning: fixltx2e is not required with releases after 2015
(fixltx2e)                All fixes are now in the LaTeX kernel.
(fixltx2e)                See the latexrelease package for details.

Already applied: [0000/00/00] Old fixltx2e package on input line 53.
) (/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/biblatex.sty
Package: biblatex 2018/03/04 v3.11 programmable bibliographies (PK/MW)
(/usr/local/texlive/2018/texmf-dist/tex/latex/logreq/logreq.sty
Package: logreq 2010/08/04 v1.0 xml request logger
\lrq@indent=\count144
(/usr/local/texlive/2018/texmf-dist/tex/latex/logreq/logreq.def
File: logreq.def 2010/08/04 v1.0 logreq spec v1.0
)) (/usr/local/texlive/2018/texmf-dist/tex/latex/base/ifthen.sty
Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC)
) (/usr/local/texlive/2018/texmf-dist/tex/generic/xstring/xstring.sty (/usr/loc
al/texlive/2018/texmf-dist/tex/generic/xstring/xstring.tex
\@xs@message=\write6
\integerpart=\count145
\decimalpart=\count146
)
Package: xstring 2013/10/13  v1.7c  String manipulations (C Tellechea)
)
\c@tabx@nest=\count147
\c@listtotal=\count148
\c@listcount=\count149
\c@liststart=\count150
\c@liststop=\count151
\c@citecount=\count152
\c@citetotal=\count153
\c@multicitecount=\count154
\c@multicitetotal=\count155
\c@instcount=\count156
\c@maxnames=\count157
\c@minnames=\count158
\c@maxitems=\count159
\c@minitems=\count160
\c@citecounter=\count161
\c@savedcitecounter=\count162
\c@uniquelist=\count163
\c@uniquename=\count164
\c@refsection=\count165
\c@refsegment=\count166
\c@maxextratitle=\count167
\c@maxextratitleyear=\count168
\c@maxextradate=\count169
\c@maxextraalpha=\count170
\c@abbrvpenalty=\count171
\c@highnamepenalty=\count172
\c@lownamepenalty=\count173
\c@maxparens=\count174
\c@parenlevel=\count175
\blx@tempcnta=\count176
\blx@tempcntb=\count177
\blx@tempcntc=\count178
\blx@maxsection=\count179
\blx@maxsegment@0=\count180
\blx@notetype=\count181
\blx@parenlevel@text=\count182
\blx@parenlevel@foot=\count183
\blx@sectionciteorder@0=\count184
\labelnumberwidth=\skip54
\labelalphawidth=\skip55
\biblabelsep=\skip56
\bibitemsep=\skip57
\bibnamesep=\skip58
\bibinitsep=\skip59
\bibparsep=\skip60
\bibhang=\skip61
\blx@bcfin=\read3
\blx@bcfout=\write7
\c@mincomprange=\count185
\c@maxcomprange=\count186
\c@mincompwidth=\count187
Package biblatex Info: Trying to load biblatex default data model...
Package biblatex Info: ... file 'blx-dm.def' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/blx-dm.def
File: blx-dm.def 2018/03/04 v3.11 biblatex localization (PK/MW)
)
Package biblatex Info: Trying to load biblatex custom data model...
Package biblatex Info: ... file 'biblatex-dm.cfg' not found.
\c@afterword=\count188
\c@savedafterword=\count189
\c@annotator=\count190
\c@savedannotator=\count191
\c@author=\count192
\c@savedauthor=\count193
\c@bookauthor=\count194
\c@savedbookauthor=\count195
\c@commentator=\count196
\c@savedcommentator=\count197
\c@editor=\count198
\c@savededitor=\count199
\c@editora=\count266
\c@savededitora=\count267
\c@editorb=\count268
\c@savededitorb=\count269
\c@editorc=\count270
\c@savededitorc=\count271
\c@foreword=\count272
\c@savedforeword=\count273
\c@holder=\count274
\c@savedholder=\count275
\c@introduction=\count276
\c@savedintroduction=\count277
\c@namea=\count278
\c@savednamea=\count279
\c@nameb=\count280
\c@savednameb=\count281
\c@namec=\count282
\c@savednamec=\count283
\c@translator=\count284
\c@savedtranslator=\count285
\c@shortauthor=\count286
\c@savedshortauthor=\count287
\c@shorteditor=\count288
\c@savedshorteditor=\count289
\c@labelname=\count290
\c@savedlabelname=\count291
\c@institution=\count292
\c@savedinstitution=\count293
\c@lista=\count294
\c@savedlista=\count295
\c@listb=\count296
\c@savedlistb=\count297
\c@listc=\count298
\c@savedlistc=\count299
\c@listd=\count300
\c@savedlistd=\count301
\c@liste=\count302
\c@savedliste=\count303
\c@listf=\count304
\c@savedlistf=\count305
\c@location=\count306
\c@savedlocation=\count307
\c@organization=\count308
\c@savedorganization=\count309
\c@origlocation=\count310
\c@savedoriglocation=\count311
\c@origpublisher=\count312
\c@savedorigpublisher=\count313
\c@publisher=\count314
\c@savedpublisher=\count315
\c@language=\count316
\c@savedlanguage=\count317
\c@origlanguage=\count318
\c@savedoriglanguage=\count319
\c@pageref=\count320
\c@savedpageref=\count321
\shorthandwidth=\skip62
\shortjournalwidth=\skip63
\shortserieswidth=\skip64
\shorttitlewidth=\skip65
\shortauthorwidth=\skip66
\shorteditorwidth=\skip67
\locallabelnumberwidth=\skip68
\locallabelalphawidth=\skip69
\localshorthandwidth=\skip70
\localshortjournalwidth=\skip71
\localshortserieswidth=\skip72
\localshorttitlewidth=\skip73
\localshortauthorwidth=\skip74
\localshorteditorwidth=\skip75
Package biblatex Info: Trying to load enhanced support for Unicode engines...
Package biblatex Info: ... file 'blx-unicode.def' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/blx-unicode.def)
Package biblatex Info: Trying to load compatibility code...
Package biblatex Info: ... file 'blx-compat.def' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/blx-compat.def
File: blx-compat.def 2018/03/04 v3.11 biblatex compatibility (PK/MW)
)
Package biblatex Info: Trying to load generic definitions...
Package biblatex Info: ... file 'biblatex.def' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/biblatex.def
File: biblatex.def 2018/03/04 v3.11 biblatex compatibility (PK/MW)
\c@textcitecount=\count322
\c@textcitetotal=\count323
\c@textcitemaxnames=\count324
\c@biburlnumpenalty=\count325
\c@biburlucpenalty=\count326
\c@biburllcpenalty=\count327
\c@smartand=\count328
)
Package biblatex Info: Trying to load bibliography style 'numeric'...
Package biblatex Info: ... file 'numeric.bbx' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/bbx/numeric.bbx
File: numeric.bbx 2018/03/04 v3.11 biblatex bibliography style (PK/MW)
Package biblatex Info: Trying to load bibliography style 'standard'...
Package biblatex Info: ... file 'standard.bbx' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/bbx/standard.bbx
File: standard.bbx 2018/03/04 v3.11 biblatex bibliography style (PK/MW)
\c@bbx:relatedcount=\count329
\c@bbx:relatedtotal=\count330
))
Package biblatex Info: Trying to load citation style 'numeric'...
Package biblatex Info: ... file 'numeric.cbx' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/cbx/numeric.cbx
File: numeric.cbx 2018/03/04 v3.11 biblatex citation style (PK/MW)
Package biblatex Info: Redefining '\cite'.
Package biblatex Info: Redefining '\parencite'.
Package biblatex Info: Redefining '\footcite'.
Package biblatex Info: Redefining '\footcitetext'.
Package biblatex Info: Redefining '\smartcite'.
Package biblatex Info: Redefining '\supercite'.
Package biblatex Info: Redefining '\textcite'.
Package biblatex Info: Redefining '\textcites'.
Package biblatex Info: Redefining '\cites'.
Package biblatex Info: Redefining '\parencites'.
Package biblatex Info: Redefining '\smartcites'.
)
Package biblatex Info: Trying to load configuration file...
Package biblatex Info: ... file 'biblatex.cfg' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/biblatex.cfg
File: biblatex.cfg 
)) (/usr/local/texlive/2018/texmf-dist/tex/latex/geometry/geometry.sty
Package: geometry 2018/03/24 v5.7 Page Geometry
\Gm@cnth=\count331
\Gm@cntv=\count332
\c@Gm@tempcnt=\count333
\Gm@bindingoffset=\dimen178
\Gm@wd@mp=\dimen179
\Gm@odd@mp=\dimen180
\Gm@even@mp=\dimen181
\Gm@layoutwidth=\dimen182
\Gm@layoutheight=\dimen183
\Gm@layouthoffset=\dimen184
\Gm@layoutvoffset=\dimen185
\Gm@dimlist=\toks37
) (/usr/local/texlive/2018/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty
Package: fancyhdr 2017/06/30 v3.9a Extensive control of page headers and footer
s
\f@nch@headwidth=\skip76
\f@nch@O@elh=\skip77
\f@nch@O@erh=\skip78
\f@nch@O@olh=\skip79
\f@nch@O@orh=\skip80
\f@nch@O@elf=\skip81
\f@nch@O@erf=\skip82
\f@nch@O@olf=\skip83
\f@nch@O@orf=\skip84
) (/usr/local/texlive/2018/texmf-dist/tex/xelatex/mathspec/mathspec.sty
Package: mathspec 2016/12/22 v0.2b LaTeX Package (Mathematics font selection fo
r XeLaTeX)
(/usr/local/texlive/2018/texmf-dist/tex/latex/fontspec/fontspec.sty (/usr/local
/texlive/2018/texmf-dist/tex/latex/l3packages/xparse/xparse.sty (/usr/local/tex
live/2018/texmf-dist/tex/latex/l3kernel/expl3.sty
Package: expl3 2018/03/05 L3 programming layer (loader) 
(/usr/local/texlive/2018/texmf-dist/tex/latex/l3kernel/expl3-code.tex
Package: expl3 2018/03/05 L3 programming layer (code)
\c_max_int=\count334
\l_tmpa_int=\count335
\l_tmpb_int=\count336
\g_tmpa_int=\count337
\g_tmpb_int=\count338
\g__intarray_font_int=\count339
\g__prg_map_int=\count340
\c_log_iow=\count341
\l_iow_line_count_int=\count342
\l__iow_line_target_int=\count343
\l__iow_one_indent_int=\count344
\l__iow_indent_int=\count345
\c_zero_dim=\dimen186
\c_max_dim=\dimen187
\l_tmpa_dim=\dimen188
\l_tmpb_dim=\dimen189
\g_tmpa_dim=\dimen190
\g_tmpb_dim=\dimen191
\c_zero_skip=\skip85
\c_max_skip=\skip86
\l_tmpa_skip=\skip87
\l_tmpb_skip=\skip88
\g_tmpa_skip=\skip89
\g_tmpb_skip=\skip90
\c_zero_muskip=\muskip12
\c_max_muskip=\muskip13
\l_tmpa_muskip=\muskip14
\l_tmpb_muskip=\muskip15
\g_tmpa_muskip=\muskip16
\g_tmpb_muskip=\muskip17
\l_keys_choice_int=\count346
\c__fp_leading_shift_int=\count347
\c__fp_middle_shift_int=\count348
\c__fp_trailing_shift_int=\count349
\c__fp_big_leading_shift_int=\count350
\c__fp_big_middle_shift_int=\count351
\c__fp_big_trailing_shift_int=\count352
\c__fp_Bigg_leading_shift_int=\count353
\c__fp_Bigg_middle_shift_int=\count354
\c__fp_Bigg_trailing_shift_int=\count355
\l__sort_length_int=\count356
\l__sort_min_int=\count357
\l__sort_top_int=\count358
\l__sort_max_int=\count359
\l__sort_true_max_int=\count360
\l__sort_block_int=\count361
\l__sort_begin_int=\count362
\l__sort_end_int=\count363
\l__sort_A_int=\count364
\l__sort_B_int=\count365
\l__sort_C_int=\count366
\l__tl_build_start_index_int=\count367
\l__tl_build_index_int=\count368
\l__tl_analysis_normal_int=\count369
\l__tl_analysis_index_int=\count370
\l__tl_analysis_nesting_int=\count371
\l__tl_analysis_type_int=\count372
\l__regex_internal_a_int=\count373
\l__regex_internal_b_int=\count374
\l__regex_internal_c_int=\count375
\l__regex_balance_int=\count376
\l__regex_group_level_int=\count377
\l__regex_mode_int=\count378
\c__regex_cs_in_class_mode_int=\count379
\c__regex_cs_mode_int=\count380
\l__regex_catcodes_int=\count381
\l__regex_default_catcodes_int=\count382
\c__regex_catcode_L_int=\count383
\c__regex_catcode_O_int=\count384
\c__regex_catcode_A_int=\count385
\c__regex_all_catcodes_int=\count386
\l__regex_show_lines_int=\count387
\l__regex_min_state_int=\count388
\l__regex_max_state_int=\count389
\l__regex_left_state_int=\count390
\l__regex_right_state_int=\count391
\l__regex_capturing_group_int=\count392
\l__regex_min_pos_int=\count393
\l__regex_max_pos_int=\count394
\l__regex_curr_pos_int=\count395
\l__regex_start_pos_int=\count396
\l__regex_success_pos_int=\count397
\l__regex_curr_char_int=\count398
\l__regex_curr_catcode_int=\count399
\l__regex_last_char_int=\count400
\l__regex_case_changed_char_int=\count401
\l__regex_curr_state_int=\count402
\l__regex_step_int=\count403
\l__regex_min_active_int=\count404
\l__regex_max_active_int=\count405
\l__regex_replacement_csnames_int=\count406
\l__regex_match_count_int=\count407
\l__regex_min_submatch_int=\count408
\l__regex_submatch_int=\count409
\l__regex_zeroth_submatch_int=\count410
\g__regex_trace_regex_int=\count411
\c_empty_box=\box49
\l_tmpa_box=\box50
\l_tmpb_box=\box51
\g_tmpa_box=\box52
\g_tmpb_box=\box53
\l__box_top_dim=\dimen192
\l__box_bottom_dim=\dimen193
\l__box_left_dim=\dimen194
\l__box_right_dim=\dimen195
\l__box_top_new_dim=\dimen196
\l__box_bottom_new_dim=\dimen197
\l__box_left_new_dim=\dimen198
\l__box_right_new_dim=\dimen199
\l__box_internal_box=\box54
\l__coffin_internal_box=\box55
\l__coffin_internal_dim=\dimen256
\l__coffin_offset_x_dim=\dimen257
\l__coffin_offset_y_dim=\dimen258
\l__coffin_x_dim=\dimen259
\l__coffin_y_dim=\dimen260
\l__coffin_x_prime_dim=\dimen261
\l__coffin_y_prime_dim=\dimen262
\c_empty_coffin=\box56
\l__coffin_aligned_coffin=\box57
\l__coffin_aligned_internal_coffin=\box58
\l_tmpa_coffin=\box59
\l_tmpb_coffin=\box60
\l__coffin_display_coffin=\box61
\l__coffin_display_coord_coffin=\box62
\l__coffin_display_pole_coffin=\box63
\l__coffin_display_offset_dim=\dimen263
\l__coffin_display_x_dim=\dimen264
\l__coffin_display_y_dim=\dimen265
\l__coffin_bounding_shift_dim=\dimen266
\l__coffin_left_corner_dim=\dimen267
\l__coffin_right_corner_dim=\dimen268
\l__coffin_bottom_corner_dim=\dimen269
\l__coffin_top_corner_dim=\dimen270
\l__coffin_scaled_total_height_dim=\dimen271
\l__coffin_scaled_width_dim=\dimen272
) (/usr/local/texlive/2018/texmf-dist/tex/latex/l3kernel/l3xdvipdfmx.def
File: l3xdvidpfmx.def 2017/03/18 v L3 Experimental driver: xdvipdfmx
\g__driver_image_int=\count412
))
Package: xparse 2018/02/21 L3 Experimental document command parser
\l__xparse_current_arg_int=\count413
\g__xparse_grabber_int=\count414
\l__xparse_m_args_int=\count415
\l__xparse_mandatory_args_int=\count416
\l__xparse_v_nesting_int=\count417
)
Package: fontspec 2017/11/09 v2.6g Font selection for XeLaTeX and LuaLaTeX
(/usr/local/texlive/2018/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty
Package: fontspec-xetex 2017/11/09 v2.6g Font selection for XeLaTeX and LuaLaTe
X
\l__fontspec_script_int=\count418
\l__fontspec_language_int=\count419
\l__fontspec_strnum_int=\count420
\l__fontspec_tmp_int=\count421
\l__fontspec_em_int=\count422
\l__fontspec_emdef_int=\count423
\l__fontspec_strong_int=\count424
\l__fontspec_strongdef_int=\count425
\l__fontspec_tmpa_dim=\dimen273
\l__fontspec_tmpb_dim=\dimen274
\l__fontspec_tmpc_dim=\dimen275
\g__file_internal_ior=\read4
(/usr/local/texlive/2018/texmf-dist/tex/latex/base/fontenc.sty
Package: fontenc 2017/04/05 v2.0i Standard LaTeX package
(/usr/local/texlive/2018/texmf-dist/tex/latex/base/tuenc.def
File: tuenc.def 2017/04/05 v2.0i Standard LaTeX file
LaTeX Font Info:    Redeclaring font encoding TU on input line 82.
))
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \fontspec with sig. 'O{}mO{}' on line 542.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \setmainfont with sig. 'O{}mO{}' on line 546.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \setsansfont with sig. 'O{}mO{}' on line 550.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \setmonofont with sig. 'O{}mO{}' on line 554.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \setmathrm with sig. 'O{}mO{}' on line 558.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \setboldmathrm with sig. 'O{}mO{}' on line 562.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \setmathsf with sig. 'O{}mO{}' on line 566.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \setmathtt with sig. 'O{}mO{}' on line 570.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \setromanfont with sig. 'O{}mO{}' on line 574.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \newfontfamily with sig. 'mO{}mO{}' on line 578.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \newfontface with sig. 'mO{}mO{}' on line 582.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \defaultfontfeatures with sig. 't+om' on line 586.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \addfontfeatures with sig. 'm' on line 590.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \addfontfeature with sig. 'm' on line 594.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \newfontfeature with sig. 'mm' on line 598.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \newAATfeature with sig. 'mmmm' on line 602.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \newopentypefeature with sig. 'mmm' on line 606.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \newICUfeature with sig. 'mmm' on line 610.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \aliasfontfeature with sig. 'mm' on line 614.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \aliasfontfeatureoption with sig. 'mmm' on line 618.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \newfontscript with sig. 'mm' on line 622.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \newfontlanguage with sig. 'mm' on line 626.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \DeclareFontsExtensions with sig. 'm' on line 630.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \IfFontFeatureActiveTF with sig. 'mmm' on line 634.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \EncodingCommand with sig. 'mO{}m' on line 3632.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \EncodingAccent with sig. 'mm' on line 3638.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \EncodingSymbol with sig. 'mm' on line 3644.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \EncodingComposite with sig. 'mmm' on line 3650.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \EncodingCompositeCommand with sig. 'mmm' on line 3656.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \DeclareUnicodeEncoding with sig. 'mm' on line 3681.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \UndeclareSymbol with sig. 'm' on line 3687.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \UndeclareAccent with sig. 'm' on line 3693.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \UndeclareCommand with sig. 'm' on line 3699.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \UndeclareComposite with sig. 'mm' on line 3706.
.................................................
(/usr/local/texlive/2018/texmf-dist/tex/latex/fontspec/fontspec.cfg)
LaTeX Info: Redefining \itshape on input line 3891.
LaTeX Info: Redefining \slshape on input line 3896.
LaTeX Info: Redefining \scshape on input line 3901.
LaTeX Info: Redefining \upshape on input line 3906.
LaTeX Info: Redefining \em on input line 3936.
LaTeX Info: Redefining \emph on input line 3961.
LaTeX Info: Redefining \- on input line 4015.
.................................................
. LaTeX info: "xparse/redefine-command"
. 
. Redefining command \oldstylenums with sig. 'm' on line 4110.
.................................................
.................................................
. LaTeX info: "xparse/define-command"
. 
. Defining command \liningnums with sig. 'm' on line 4114.
.................................................
)) (/usr/local/texlive/2018/texmf-dist/tex/latex/xkeyval/xkeyval.sty
Package: xkeyval 2014/12/03 v2.7a package option processing (HA)
(/usr/local/texlive/2018/texmf-dist/tex/generic/xkeyval/xkeyval.tex (/usr/local
/texlive/2018/texmf-dist/tex/generic/xkeyval/xkvutils.tex
\XKV@toks=\toks38
\XKV@tempa@toks=\toks39
)
\XKV@depth=\count426
File: xkeyval.tex 2014/12/03 v2.7a key=value parser (HA)
))
\c@eu@=\count427
\c@eu@i=\count428
\c@mkern=\count429
) (/usr/local/texlive/2018/texmf-dist/tex/latex/upquote/upquote.sty
Package: upquote 2012/04/19 v1.3 upright-quote and grave-accent glyphs in verba
tim
(/usr/local/texlive/2018/texmf-dist/tex/latex/base/textcomp.sty
Package: textcomp 2017/04/05 v2.0i Standard LaTeX package
Package textcomp Info: Sub-encoding information:
(textcomp)               5 = only ISO-Adobe without \textcurrency
(textcomp)               4 = 5 + \texteuro
(textcomp)               3 = 4 + \textohm
(textcomp)               2 = 3 + \textestimated + \textcurrency
(textcomp)               1 = TS1 - \textcircled - \t
(textcomp)               0 = TS1 (full)
(textcomp)             Font families with sub-encoding setting implement
(textcomp)             only a restricted character set as indicated.
(textcomp)             Family '?' is the default used for unknown fonts.
(textcomp)             See the documentation for details.
Package textcomp Info: Setting ? sub-encoding to TS1/1 on input line 79.
(/usr/local/texlive/2018/texmf-dist/tex/latex/base/ts1enc.def
File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file
)
LaTeX Info: Redefining \oldstylenums on input line 334.
Package textcomp Info: Setting cmr sub-encoding to TS1/0 on input line 349.
Package textcomp Info: Setting cmss sub-encoding to TS1/0 on input line 350.
Package textcomp Info: Setting cmtt sub-encoding to TS1/0 on input line 351.
Package textcomp Info: Setting cmvtt sub-encoding to TS1/0 on input line 352.
Package textcomp Info: Setting cmbr sub-encoding to TS1/0 on input line 353.
Package textcomp Info: Setting cmtl sub-encoding to TS1/0 on input line 354.
Package textcomp Info: Setting ccr sub-encoding to TS1/0 on input line 355.
Package textcomp Info: Setting ptm sub-encoding to TS1/4 on input line 356.
Package textcomp Info: Setting pcr sub-encoding to TS1/4 on input line 357.
Package textcomp Info: Setting phv sub-encoding to TS1/4 on input line 358.
Package textcomp Info: Setting ppl sub-encoding to TS1/3 on input line 359.
Package textcomp Info: Setting pag sub-encoding to TS1/4 on input line 360.
Package textcomp Info: Setting pbk sub-encoding to TS1/4 on input line 361.
Package textcomp Info: Setting pnc sub-encoding to TS1/4 on input line 362.
Package textcomp Info: Setting pzc sub-encoding to TS1/4 on input line 363.
Package textcomp Info: Setting bch sub-encoding to TS1/4 on input line 364.
Package textcomp Info: Setting put sub-encoding to TS1/5 on input line 365.
Package textcomp Info: Setting uag sub-encoding to TS1/5 on input line 366.
Package textcomp Info: Setting ugq sub-encoding to TS1/5 on input line 367.
Package textcomp Info: Setting ul8 sub-encoding to TS1/4 on input line 368.
Package textcomp Info: Setting ul9 sub-encoding to TS1/4 on input line 369.
Package textcomp Info: Setting augie sub-encoding to TS1/5 on input line 370.
Package textcomp Info: Setting dayrom sub-encoding to TS1/3 on input line 371.
Package textcomp Info: Setting dayroms sub-encoding to TS1/3 on input line 372.

Package textcomp Info: Setting pxr sub-encoding to TS1/0 on input line 373.
Package textcomp Info: Setting pxss sub-encoding to TS1/0 on input line 374.
Package textcomp Info: Setting pxtt sub-encoding to TS1/0 on input line 375.
Package textcomp Info: Setting txr sub-encoding to TS1/0 on input line 376.
Package textcomp Info: Setting txss sub-encoding to TS1/0 on input line 377.
Package textcomp Info: Setting txtt sub-encoding to TS1/0 on input line 378.
Package textcomp Info: Setting lmr sub-encoding to TS1/0 on input line 379.
Package textcomp Info: Setting lmdh sub-encoding to TS1/0 on input line 380.
Package textcomp Info: Setting lmss sub-encoding to TS1/0 on input line 381.
Package textcomp Info: Setting lmssq sub-encoding to TS1/0 on input line 382.
Package textcomp Info: Setting lmvtt sub-encoding to TS1/0 on input line 383.
Package textcomp Info: Setting lmtt sub-encoding to TS1/0 on input line 384.
Package textcomp Info: Setting qhv sub-encoding to TS1/0 on input line 385.
Package textcomp Info: Setting qag sub-encoding to TS1/0 on input line 386.
Package textcomp Info: Setting qbk sub-encoding to TS1/0 on input line 387.
Package textcomp Info: Setting qcr sub-encoding to TS1/0 on input line 388.
Package textcomp Info: Setting qcs sub-encoding to TS1/0 on input line 389.
Package textcomp Info: Setting qpl sub-encoding to TS1/0 on input line 390.
Package textcomp Info: Setting qtm sub-encoding to TS1/0 on input line 391.
Package textcomp Info: Setting qzc sub-encoding to TS1/0 on input line 392.
Package textcomp Info: Setting qhvc sub-encoding to TS1/0 on input line 393.
Package textcomp Info: Setting futs sub-encoding to TS1/4 on input line 394.
Package textcomp Info: Setting futx sub-encoding to TS1/4 on input line 395.
Package textcomp Info: Setting futj sub-encoding to TS1/4 on input line 396.
Package textcomp Info: Setting hlh sub-encoding to TS1/3 on input line 397.
Package textcomp Info: Setting hls sub-encoding to TS1/3 on input line 398.
Package textcomp Info: Setting hlst sub-encoding to TS1/3 on input line 399.
Package textcomp Info: Setting hlct sub-encoding to TS1/5 on input line 400.
Package textcomp Info: Setting hlx sub-encoding to TS1/5 on input line 401.
Package textcomp Info: Setting hlce sub-encoding to TS1/5 on input line 402.
Package textcomp Info: Setting hlcn sub-encoding to TS1/5 on input line 403.
Package textcomp Info: Setting hlcw sub-encoding to TS1/5 on input line 404.
Package textcomp Info: Setting hlcf sub-encoding to TS1/5 on input line 405.
Package textcomp Info: Setting pplx sub-encoding to TS1/3 on input line 406.
Package textcomp Info: Setting pplj sub-encoding to TS1/3 on input line 407.
Package textcomp Info: Setting ptmx sub-encoding to TS1/4 on input line 408.
Package textcomp Info: Setting ptmj sub-encoding to TS1/4 on input line 409.
)) (/usr/local/texlive/2018/texmf-dist/tex/latex/microtype/microtype.sty
Package: microtype 2018/01/14 v2.7a Micro-typographical refinements (RS)
\MT@toks=\toks40
\MT@count=\count430
LaTeX Info: Redefining \textls on input line 793.
\MT@outer@kern=\dimen276
LaTeX Info: Redefining \textmicrotypecontext on input line 1339.
\MT@listname@count=\count431
(/usr/local/texlive/2018/texmf-dist/tex/latex/microtype/microtype-xetex.def
File: microtype-xetex.def 2018/01/14 v2.7a Definitions specific to xetex (RS)
LaTeX Info: Redefining \lsstyle on input line 256.
)
Package microtype Info: Loading configuration file microtype.cfg.
(/usr/local/texlive/2018/texmf-dist/tex/latex/microtype/microtype.cfg
File: microtype.cfg 2018/01/14 v2.7a microtype main configuration file (RS)
))
Package hyperref Info: Option `unicode' set `true' on input line 151.
(/usr/local/texlive/2018/texmf-dist/tex/generic/oberdiek/se-ascii-print.def
File: se-ascii-print.def 2016/05/16 v1.11 stringenc: Printable ASCII characters

)
Package hyperref Info: Option `breaklinks' set `true' on input line 151.
(/usr/local/texlive/2018/texmf-dist/tex/latex/oberdiek/grffile.sty
Package: grffile 2017/06/30 v1.18 Extended file name support for graphics (HO)
Package grffile Info: Option `multidot' is set to `true'.
Package grffile Info: Option `extendedchars' is set to `false'.
Package grffile Info: Option `space' is set to `true'.
Package grffile Info: \Gin@ii of package `graphicx' fixed on input line 494.
) (/usr/local/texlive/2018/texmf-dist/tex/latex/parskip/parskip.sty
Package: parskip 2001/04/09 non-zero parskip adjustments
)
\csllabelwidth=\skip91
\cslhangindent=\skip92
Package biblatex Info: Trying to load language 'english'...
Package biblatex Info: ... file 'english.lbx' found.
(/usr/local/texlive/2018/texmf-dist/tex/latex/biblatex/lbx/english.lbx
File: english.lbx 2018/03/04 v3.11 biblatex localization (PK/MW)
)
\@quotelevel=\count432
\@quotereset=\count433
(./paper.aux)
\openout1 = `paper.aux'.

LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for TU/lmr/m/n on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for PU/pdf/m/n on input line 246.
LaTeX Font Info:    ... okay on input line 246.
LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 246.
LaTeX Font Info:    Try loading font information for TS1+cmr on input line 246.

(/usr/local/texlive/2018/texmf-dist/tex/latex/base/ts1cmr.fd
File: ts1cmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions
)
LaTeX Font Info:    ... okay on input line 246.
Package marginnote Info: \pdfoutput not 0 or unimportant and
(marginnote)             \pdflastxpos or \lastxpos available.
(marginnote)             Extended position detection mode activated.
ABD: EveryShipout initializing macros
\AtBeginShipoutBox=\box64
Package hyperref Info: Link coloring ON on input line 246.
(/usr/local/texlive/2018/texmf-dist/tex/latex/hyperref/nameref.sty
Package: nameref 2016/05/21 v2.44 Cross-referencing by name of section
(/usr/local/texlive/2018/texmf-dist/tex/generic/oberdiek/gettitlestring.sty
Package: gettitlestring 2016/05/16 v1.5 Cleanup title references (HO)
)
\c@section@level=\count434
)
LaTeX Info: Redefining \ref on input line 246.
LaTeX Info: Redefining \pageref on input line 246.
LaTeX Info: Redefining \nameref on input line 246.
(./paper.out) (./paper.out)
\@outlinefile=\write8
\openout8 = `paper.out'.

Package caption Info: Begin \AtBeginDocument code.
Package caption Info: End \AtBeginDocument code.
Package biblatex Info: XeTeX detected.
(biblatex)             Assuming input encoding 'utf8'.
Package biblatex Info: Automatic encoding selection.
(biblatex)             Assuming data encoding 'utf8'.
\openout7 = `paper.bcf'.

Package biblatex Info: Trying to load bibliographic data...
Package biblatex Info: ... file 'paper.bbl' not found.
No file paper.bbl.
Package biblatex Info: Reference section=0 on input line 246.
Package biblatex Info: Reference segment=0 on input line 246.
*geometry* driver: auto-detecting
*geometry* detected driver: xetex
*geometry* verbose mode - [ preamble ] result:
* driver: xetex
* paper: a4paper
* layout: <same size as paper>
* layoutoffset:(h,v)=(0.0pt,0.0pt)
* modes: includemp 
* h-part:(L,W,R)=(28.45274pt, 526.376pt, 42.67912pt)
* v-part:(T,H,B)=(99.58464pt, 660.10394pt, 85.35826pt)
* \paperwidth=597.50787pt
* \paperheight=845.04684pt
* \textwidth=387.33861pt
* \textheight=660.10394pt
* \oddsidemargin=95.22015pt
* \evensidemargin=95.22015pt
* \topmargin=-60.28131pt
* \headheight=62.59596pt
* \headsep=25.0pt
* \topskip=10.0pt
* \footskip=30.0pt
* \marginparwidth=128.0374pt
* \marginparsep=11.0pt
* \columnsep=10.0pt
* \skip\footins=9.0pt plus 4.0pt minus 2.0pt
* \hoffset=0.0pt
* \voffset=0.0pt
* \mag=1000
* \@twocolumnfalse
* \@twosidefalse
* \@mparswitchfalse
* \@reversemargintrue
* (1in=72.27pt=25.4mm, 1cm=28.453pt)

LaTeX Info: Redefining \microtypecontext on input line 246.
Package microtype Info: Character protrusion enabled (level 2).
Package microtype Info: Using protrusion set `basicmath'.
Package microtype Info: No adjustment of tracking.
Package microtype Info: No adjustment of spacing.
Package microtype Info: No adjustment of kerning.

(/usr/local/texlive/2018/texmf-dist/tex/latex/microtype/mt-LatinModernRoman.cfg
File: mt-LatinModernRoman.cfg 2013/03/13 v1.0 microtype config. file: Latin Mod
ern Roman (RS)
)
Package microtype Info: Loading generic protrusion settings for font family
(microtype)             `lmss' (encoding: TU).
(microtype)             For optimal results, create family-specific settings.
(microtype)             See the microtype manual for details.
(/usr/local/texlive/2018/texmf-dist/tex/latex/microtype/mt-cmr.cfg
File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman 
(RS)
)
LaTeX Font Info:    Try loading font information for U+msa on input line 247.
(/usr/local/texlive/2018/texmf-dist/tex/latex/amsfonts/umsa.fd
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
) (/usr/local/texlive/2018/texmf-dist/tex/latex/microtype/mt-msa.cfg
File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
)
LaTeX Font Info:    Try loading font information for U+msb on input line 247.
(/usr/local/texlive/2018/texmf-dist/tex/latex/amsfonts/umsb.fd
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
) (/usr/local/texlive/2018/texmf-dist/tex/latex/microtype/mt-msb.cfg
File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
)

Package hyperref Warning: Suppressing link with empty target on input line 273.


Package hyperref Warning: Suppressing link with empty target on input line 273.


Package hyperref Warning: Suppressing link with empty target on input line 273.


File: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rticles/rm
arkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)
</Library/Frameworks/R.framework/Versions/4.1/Resources/library/rticles/rmarkdo
wn/templates/joss/resources/JOSS-logo.png>

Package Fancyhdr Warning: \headheight is too small (62.59596pt): 
 Make it at least 63.55022pt.
 We now make it that large for the rest of the document.
 This may cause the page layout to be inconsistent, however.

LaTeX Font Info:    Font shape `TU/lmss/m/it' in size <8> not available
(Font)              Font shape `TU/lmss/m/sl' tried instead on input line 338.
[1

]
Underfull \hbox (badness 1097) in paragraph at lines 389--397
\TU/lmr/m/n/10 via bootstrapping with the omission of the \TU/lmtt/m/n/10 hypot
hesize() \TU/lmr/m/n/10 step in the pipeline.
 []


Underfull \hbox (badness 2393) in paragraph at lines 389--397
\TU/lmr/m/n/10 The resulting bootstrap distribution can then be visualized with
 \TU/lmtt/m/n/10 visualize()\TU/lmr/m/n/10 ,
 []

File: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rticles/rm
arkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)
</Library/Frameworks/R.framework/Versions/4.1/Resources/library/rticles/rmarkdo
wn/templates/joss/resources/JOSS-logo.png>

Package Fancyhdr Warning: \headheight is too small (62.59596pt): 
 Make it at least 63.55022pt.
 We now make it that large for the rest of the document.
 This may cause the page layout to be inconsistent, however.

[2]
File: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rticles/rm
arkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)
</Library/Frameworks/R.framework/Versions/4.1/Resources/library/rticles/rmarkdo
wn/templates/joss/resources/JOSS-logo.png>

Package Fancyhdr Warning: \headheight is too small (62.59596pt): 
 Make it at least 63.55022pt.
 We now make it that large for the rest of the document.
 This may cause the page layout to be inconsistent, however.

[3]
File: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rticles/rm
arkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)
</Library/Frameworks/R.framework/Versions/4.1/Resources/library/rticles/rmarkdo
wn/templates/joss/resources/JOSS-logo.png>

Package Fancyhdr Warning: \headheight is too small (62.59596pt): 
 Make it at least 63.55022pt.
 We now make it that large for the rest of the document.
 This may cause the page layout to be inconsistent, however.

[4]
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 543.
Package atveryend Info: Empty hook `AfterLastShipout' on input line 543.
(./paper.aux)
Package atveryend Info: Empty hook `AtVeryEndDocument' on input line 543.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 543.
Package rerunfilecheck Info: File `paper.out' has not changed.
(rerunfilecheck)             Checksum: 119F0C1EB97B96F62103D82D4B4F1B79.
Package logreq Info: Writing requests to 'paper.run.xml'.
\openout1 = `paper.run.xml'.

 ) 
Here is how much of TeX's memory you used:
 41154 strings out of 492970
 809494 string characters out of 6133938
 1282755 words of memory out of 5000000
 44661 multiletter control sequences out of 15000+600000
 537136 words of font info for 69 fonts, out of 8000000 for 9000
 1348 hyphenation exceptions out of 8191
 55i,12n,73p,10446b,790s stack positions out of 5000i,500n,10000p,200000b,80000s

Output written on paper.pdf (4 pages).


================================================
FILE: figs/paper/paper.md
================================================
---
title: 'infer: An R package for tidyverse-friendly statistical inference'
tags:
  - data science
  - tidyverse
  - inference
  - R
authors:
- name: Simon P. Couch
  orcid: 0000-0001-5676-5107
  affiliation: "1, 2"
- name: Andrew P. Bray
  orcid: 0000-0002-4037-7414
  affiliation: 3
- name: Chester Ismay
  orcid: 0000-0003-2820-2547
  affiliation: 4
- name: Evgeni Chasnovski
  orcid: 0000-0002-1617-4019
  affiliation: 5
- name: Benjamin S. Baumer
  orcid: 0000-0002-3279-0516
  affiliation: 6
- name: Mine Çetinkaya-Rundel
  orcid: 0000-0001-6452-2420
  affiliation: "2, 7"
affiliations:
 - name: Johns Hopkins, Department of Biostatistics
   index: 1
 - name: RStudio
   index: 2
 - name: UC Berkeley, Department of Statistics and Reed College Mathematics Department (on leave)
   index: 3
 - name: Flatiron School
   index: 4
 - name: No Affiliation
   index: 5
 - name: Smith College, Program in Statistical & Data Sciences
   index: 6
 - name: Duke University, Department of Statistical Science
   index: 7

citation_author: Couch et. al.
date: 12 June 2021
year: 2021
bibliography: paper.bib
output: 
  rticles::joss_article:
    keep_tex: true
    includes:
      in_header: columns.tex
csl: apa.csl
journal: JOSS
---

# Summary

`infer` implements an expressive grammar to perform statistical inference that adheres to the `tidyverse` design framework [@wickham2019welcome]. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests and confidence intervals into a set of four main verbs (functions), supplemented with many utilities to visualize and extract value from their outputs.

# Statement of Need

Packages implementing methods for basic statistical inference in R are highly variable in their interfaces. The structure of inputted data, argument names, expected argument types, argument orders, output types, and spelling cases varies widely both within and among packages. This diversity in approaches obscures the intuition shared among common inferential procedures, makes details of usage difficult to remember, and prevents an expressive and idiomatic coding style.

`infer` is an R package for randomization-based hypothesis testing, naturalizing an intuitive understanding of statistical inference via a unified and expressive grammar. Four functions provide functionality encompassing a large swath of basic frequentist statistical inference, abstracting away details of specific tests and shifting the focus of the analyst to the observed data and the processes that generated it. Such a grammar lends itself to applications in teaching, data pedagogy research, applied scientific research, and advanced predictive modeling. For one, the principled approach of the `infer` package has made it an especially good fit for teaching introductory statistics and data science [@ismay2019statistical; @baumer2020teaching; @cetinkaya2021fresh] and research in data pedagogy [@fergusson2021introducing; @loy2021bringing]. Further, the package has already seen usage in a number of published scientific applications [@mclean2021controlled; @ask2021per; @fallon2021single]. Finally, the package integrates with the greater tidymodels collection of packages, a burgeoning software ecosystem for tidyverse-aligned predictive modeling used across many modern research and industrial applications [@kuhn2020tidymodels]. To date, the package has been downloaded more than 400,000 times.

# Underlying Principles

Regardless of the hypothesis test in question, an analyst asks the same kind of question when conducting statistical inference: is the effect/difference in the observed data real, or due to random chance? To answer this question, the analyst begins by assuming that the effect in the observed data was simply due to random chance, and calls this assumption the *null hypothesis*. (In reality, they might not believe in the null hypothesis at all---the null hypothesis is in opposition to the *alternate hypothesis*, which supposes that the effect present in the observed data is actually due to the fact that "something is going on.") The analyst then calculates a *test statistic* from the data that describes the observed effect. They can use this test statistic to calculate a *p-value* via juxtaposition with a *null distribution*, giving the probability that the observed data could come about if the null hypothesis were true. If this probability is below some pre-defined *significance level* $\alpha$, then the analyst can reject the null hypothesis.

The workflow of this package is designed around this idea. Starting out with some dataset,

+ `specify()` allows the analyst to specify the variable, or relationship between variables, that they are interested in.
+ `hypothesize()` allows the analyst to declare the null hypothesis.
+ `generate()` allows the analyst to generate data reflecting the null hypothesis or using the bootstrap.
+ `calculate()` allows the analyst to calculate summary statistics, either from
     * the observed data, to form the observed test statistic.
     * data `generate()`d to reflect the null hypothesis, to form a randomization-based null distribution of test statistics.

As such, the ultimate output of an infer pipeline using these four functions is generally an _observed statistic_ or _null distribution_ of test statistics. These four functions are thus supplemented with several utilities to visualize and extract value from their outputs.

+ `visualize()` plots the null distribution of test statistics.
     * `shade_p_value()` situates the observed statistic in the null distribution, shading the region as or more extreme.
+ `get_p_value()` calculates a p-value via the juxtaposition of the test statistic and the null distribution.

The workflow outlined above can also be used for constructing confidence intervals via bootstrapping with the omission of the `hypothesize()` step in the pipeline. The resulting bootstrap distribution can then be visualized with `visualize()`, the confidence interval region can be situated in the bootstrap distribution with `shade_confidence_interval()`, and the bounds of the confidence interval can be calculated with `get_confidence_interval()`.

Beyond this, the `infer` package offers:

* methods for inference using theory-based distributions
* shorthand wrappers for common statistical tests using tidy data
* model-fitting workflows to accommodate multiple explanatory variables

# Comparison to Other Packages

Several software packages on the Comprehensive R Archive Network share functionality with `infer` [@CRAN]. `broom` and `parameters` convert model objects to unified output formats, though they do not provide methods for fitting models, describing null distributions, performing bootstrapping, or calculating summary statistics from tabular data [@r-broom; @r-parameters]. `statsExpressions`, and adjacent packages in the `easystats` ecosystem, implement wrappers with consistent interfaces for theory-based hypothesis tests [@r-statsExpressions]. Similarly, `mosaic` is a package used to teach statistics by unifying summary statistics, visualization, and modeling with a consistent API built around R's formula interface. The `mosaic` package also includes functionality to conduct randomization-based inference [@r-mosaic]. At a higher level, though, the structure of each of these packages is defined by model types and statistics, where each model type or statistic has its own associated function and/or object class. In contrast, `infer` is structured around four functions, situating statistics and model types within a more abstracted grammar.^[This grammar follows from Allen Downey's "there is only one test" framework [@downey2016].] 

# Acknowledgements

We acknowledge contributions from Albert Y. Kim, Jo Hardin, Jay Lee, Amelia McNamara, Nick Solomon, and Richie Cotton.

# References


================================================
FILE: infer.Rproj
================================================
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 3
Encoding: UTF-8

RnwWeave: knitr
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source


================================================
FILE: inst/CITATION
================================================
bibentry(
  "Article",
  title     = "{infer}: An {R} package for tidyverse-friendly statistical inference",
  author    = "Simon P. Couch, Andrew P. Bray, Chester Ismay, Evgeni Chasnovski, Benjamin S. Baumer, Mine Çetinkaya-Rundel",
  journal   = "Journal of Open Source Software",
  year      = 2021,
  volume    = 6,
  number    = 65,
  pages     = 3661,
  doi       = "10.21105/joss.03661",
  textVersion = "Couch et al., (2021). infer: An R package for tidyverse-friendly statistical inference. Journal of Open Source Software, 6(65), 3661, https://doi.org/10.21105/joss.03661"
)


================================================
FILE: man/assume.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/assume.R
\name{assume}
\alias{assume}
\title{Define a theoretical distribution}
\usage{
assume(x, distribution, df = NULL, ...)
}
\arguments{
\item{x}{The output of \code{\link[=specify]{specify()}} or \code{\link[=hypothesize]{hypothesize()}}, giving the
observed data, variable(s) of interest, and (optionally) null hypothesis.}

\item{distribution}{The distribution in question, as a string. One of
\code{"F"}, \code{"Chisq"}, \code{"t"}, or \code{"z"}.}

\item{df}{Optional. The degrees of freedom parameter(s) for the \code{distribution}
supplied, as a numeric vector. For \code{distribution = "F"}, this should have
length two (e.g. \code{c(10, 3)}). For \code{distribution = "Chisq"} or
\code{distribution = "t"}, this should have length one. For
\code{distribution = "z"}, this argument is not required. The package
will supply a message if the supplied \code{df} argument is different from
recognized values. See the Details section below for more information.}

\item{...}{Currently ignored.}
}
\value{
An infer theoretical distribution that can be passed to helpers
like \code{\link[=visualize]{visualize()}}, \code{\link[=get_p_value]{get_p_value()}}, and \code{\link[=get_confidence_interval]{get_confidence_interval()}}.
}
\description{
This function allows the user to define a null distribution based on
theoretical methods. In many infer pipelines, \code{assume()} can be
used in place of \code{\link[=generate]{generate()}} and \code{\link[=calculate]{calculate()}} to create a null
distribution. Rather than outputting a data frame containing a
distribution of test statistics calculated from resamples of the observed
data, \code{assume()} outputs a more abstract type of object just containing
the distributional details supplied in the \code{distribution} and \code{df} arguments.
However, \code{assume()} output can be passed to \code{\link[=visualize]{visualize()}}, \code{\link[=get_p_value]{get_p_value()}},
and \code{\link[=get_confidence_interval]{get_confidence_interval()}} in the same way that simulation-based
distributions can.

To define a theoretical null distribution (for use in hypothesis testing),
be sure to provide a null hypothesis via \code{\link[=hypothesize]{hypothesize()}}. To define a
theoretical sampling distribution (for use in confidence intervals),
provide the output of \code{\link[=specify]{specify()}}. Sampling distributions (only
implemented for \code{t} and \code{z}) lie on the scale of the data, and will be
recentered and rescaled to match the corresponding \code{stat} given in
\code{\link[=calculate]{calculate()}} to calculate the observed statistic.
}
\details{
Note that the assumption being expressed here, for use in theory-based
inference, only extends to \emph{distributional} assumptions: the null
distribution in question and its parameters. Statistical inference with
infer, whether carried out via simulation (i.e. based on pipelines
using \code{\link[=generate]{generate()}} and \code{\link[=calculate]{calculate()}}) or theory (i.e. with \code{assume()}),
always involves the condition that observations are independent of
each other.

\code{infer} only supports theoretical tests on one or two means via the
\code{t} distribution and one or two proportions via the \code{z}.

For tests comparing two means, if \code{n1} is the group size for one level of
the explanatory variable, and \code{n2} is that for the other level, \code{infer}
will recognize the following degrees of freedom (\code{df}) arguments:
\itemize{
\item \code{min(n1 - 1, n2 - 1)}
\item \code{n1 + n2 - 2}
\item The \code{"parameter"} entry of the analogous \code{stats::t.test()} call
\item The \code{"parameter"} entry of the analogous \code{stats::t.test()} call with \code{var.equal = TRUE}
}

By default, the package will use the \code{"parameter"} entry of the analogous
\code{stats::t.test()} call with \code{var.equal = FALSE} (the default).
}
\examples{
# construct theoretical distributions ---------------------------------

# F distribution
# with the `partyid` explanatory variable
gss |>
  specify(age ~ partyid) |>
  assume(distribution = "F")

# Chi-squared goodness of fit distribution
# on the `finrela` variable
gss |>
  specify(response = finrela) |>
  hypothesize(null = "point",
              p = c("far below average" = 1/6,
                    "below average" = 1/6,
                    "average" = 1/6,
                    "above average" = 1/6,
                    "far above average" = 1/6,
                    "DK" = 1/6)) |>
  assume("Chisq")

# Chi-squared test of independence
# on the `finrela` and `sex` variables
gss |>
  specify(formula = finrela ~ sex) |>
  assume(distribution = "Chisq")

# T distribution
gss |>
  specify(age ~ college) |>
  assume("t")

# Z distribution
gss |>
  specify(response = sex, success = "female") |>
  assume("z")

\dontrun{
# each of these distributions can be passed to infer helper
# functions alongside observed statistics!

# for example, a 1-sample t-test -------------------------------------

# calculate the observed statistic
obs_stat <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t")

# construct a null distribution
null_dist <- gss |>
  specify(response = hours) |>
  assume("t")

# juxtapose them visually
visualize(null_dist) +
  shade_p_value(obs_stat, direction = "both")

# calculate a p-value
get_p_value(null_dist, obs_stat, direction = "both")

# or, an F test ------------------------------------------------------

# calculate the observed statistic
obs_stat <- gss |>
  specify(age ~ partyid) |>
  hypothesize(null = "independence") |>
  calculate(stat = "F")

# construct a null distribution
null_dist <- gss |>
  specify(age ~ partyid) |>
  assume(distribution = "F")

# juxtapose them visually
visualize(null_dist) +
  shade_p_value(obs_stat, direction = "both")

# calculate a p-value
get_p_value(null_dist, obs_stat, direction = "both")
}

}


================================================
FILE: man/calculate.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calculate.R
\name{calculate}
\alias{calculate}
\title{Calculate summary statistics}
\usage{
calculate(
  x,
  stat = c("mean", "median", "sum", "sd", "prop", "count", "diff in means",
    "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z",
    "ratio of props", "odds ratio", "ratio of means"),
  order = NULL,
  ...
)
}
\arguments{
\item{x}{The output from \code{\link[=generate]{generate()}} for computation-based inference or the
output from \code{\link[=hypothesize]{hypothesize()}} piped in to here for theory-based inference.}

\item{stat}{A string giving the type of the statistic to calculate or a
function that takes in a replicate of \code{x} and returns a scalar value. Current
options include \code{"mean"}, \code{"median"}, \code{"sum"}, \code{"sd"}, \code{"prop"}, \code{"count"},
\code{"diff in means"}, \code{"diff in medians"}, \code{"diff in props"}, \code{"Chisq"} (or
\code{"chisq"}), \code{"F"} (or \code{"f"}), \code{"t"}, \code{"z"}, \code{"ratio of props"}, \code{"slope"},
\code{"odds ratio"}, \code{"ratio of means"}, and \code{"correlation"}. \code{infer} only
supports theoretical tests on one or two means via the \code{"t"} distribution
and one or two proportions via the \code{"z"}. See the "Arbitrary test statistics"
section below for more on how to define a custom statistic.}

\item{order}{A string vector of specifying the order in which the levels of
the explanatory variable should be ordered for subtraction (or division
for ratio-based statistics), where \code{order = c("first", "second")} means
\code{("first" - "second")}, or the analogue for ratios. Needed for inference on
difference in means, medians, proportions, ratios, t, and z statistics.}

\item{...}{To pass options like \code{na.rm = TRUE} into functions like
\link[base:mean]{mean()}, \link[stats:sd]{sd()}, etc. Can also be used to
supply hypothesized null values for the \code{"t"} statistic or additional
arguments to \code{\link[stats:chisq.test]{stats::chisq.test()}}.}
}
\value{
A tibble containing a \code{stat} column of calculated statistics.
}
\description{
Given the output of \code{\link[=specify]{specify()}} and/or \code{\link[=hypothesize]{hypothesize()}}, this function will
return the observed statistic specified with the \code{stat} argument. Some test
statistics, such as \code{Chisq}, \code{t}, and \code{z}, require a null hypothesis. If
provided the output of \code{\link[=generate]{generate()}}, the function will calculate the
supplied \code{stat} for each \code{replicate}.

Learn more in \code{vignette("infer")}.
}
\section{Arbitrary test statistics}{


In addition to the pre-implemented statistics documented in \code{stat}, users can
supply an arbitrary test statistic by supplying a function to the \code{stat}
argument.

The function should have arguments \code{stat(x, order, ...)}, where \code{x} is one
replicate's worth of \code{x}. The \code{order} argument and ellipses will be supplied
directly to the \code{stat} function. Internally, \code{calculate()} will split \code{x} up
into data frames by replicate and pass them one-by-one to the supplied \code{stat}.
For example, to implement \code{stat = "mean"} as a function, one could write:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{stat_mean <- function(x, order, ...) \{mean(x$hours)\}
obs_mean <-
  gss \%>\%
  specify(response = hours) \%>\%
  calculate(stat = stat_mean)

set.seed(1)
null_dist_mean <-
  gss \%>\%
  specify(response = hours) \%>\%
  hypothesize(null = "point", mu = 40) \%>\%
  generate(reps = 5, type = "bootstrap") \%>\%
  calculate(stat = stat_mean)
}\if{html}{\out{</div>}}

Note that the same \code{stat_mean} function is supplied to both \code{generate()}d and
non-\code{generate()}d infer objects--no need to implement support for grouping
by \code{replicate} yourself.
}

\section{Missing levels in small samples}{

In some cases, when bootstrapping with small samples, some generated
bootstrap samples will have only one level of the explanatory variable
present. For some test statistics, the calculated statistic in these
cases will be NaN. The package will omit non-finite values from
visualizations (with a warning) and raise an error in p-value calculations.
}

\section{Reproducibility}{
When using the infer package for research, or in other cases when exact
reproducibility is a priority, be sure the set the seed for R’s random
number generator. infer will respect the random seed specified in the
\code{set.seed()} function, returning the same result when \code{generate()}ing
data given an identical seed. For instance, we can calculate the
difference in mean \code{age} by \code{college} degree status using the \code{gss}
dataset from 10 versions of the \code{gss} resampled with permutation using
the following code.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{set.seed(1)

gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 5, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode">}}\preformatted{## Response: age (numeric)
## Explanatory: college (factor)
## Null Hypothesis: indepe...
## # A tibble: 5 x 2
##   replicate   stat
##       <int>  <dbl>
## 1         1 -0.531
## 2         2 -2.35 
## 3         3  0.764
## 4         4  0.280
## 5         5  0.350
}\if{html}{\out{</div>}}

Setting the seed to the same value again and rerunning the same code
will produce the same result.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{# set the seed
set.seed(1)

gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 5, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode">}}\preformatted{## Response: age (numeric)
## Explanatory: college (factor)
## Null Hypothesis: indepe...
## # A tibble: 5 x 2
##   replicate   stat
##       <int>  <dbl>
## 1         1 -0.531
## 2         2 -2.35 
## 3         3  0.764
## 4         4  0.280
## 5         5  0.350
}\if{html}{\out{</div>}}

Please keep this in mind when writing infer code that utilizes
resampling with \code{generate()}.
}

\examples{

# calculate a null distribution of hours worked per week under
# the null hypothesis that the mean is 40
gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  generate(reps = 200, type = "bootstrap") |>
  calculate(stat = "mean")

# calculate the corresponding observed statistic
gss |>
  specify(response = hours) |>
  calculate(stat = "mean")

# calculate a null distribution assuming independence between age
# of respondent and whether they have a college degree
gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 200, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))

# calculate the corresponding observed statistic
gss |>
  specify(age ~ college) |>
  calculate("diff in means", order = c("degree", "no degree"))

# some statistics require a null hypothesis
 gss |>
   specify(response = hours) |>
   hypothesize(null = "point", mu = 40) |>
   calculate(stat = "t")

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
\code{\link[=visualize]{visualize()}}, \code{\link[=get_p_value]{get_p_value()}}, and \code{\link[=get_confidence_interval]{get_confidence_interval()}}
to extract value from this function's outputs.

Other core functions: 
\code{\link{generate}()},
\code{\link{hypothesize}()},
\code{\link{specify}()}
}
\concept{core functions}


================================================
FILE: man/chisq_stat.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wrappers.R
\name{chisq_stat}
\alias{chisq_stat}
\title{Tidy chi-squared test statistic}
\usage{
chisq_stat(x, formula, response = NULL, explanatory = NULL, ...)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{formula}{A formula with the response variable on the left and the
explanatory on the right. Alternatively, a \code{response} and \code{explanatory}
argument can be supplied.}

\item{response}{The variable name in \code{x} that will serve as the response.
This is an alternative to using the \code{formula} argument.}

\item{explanatory}{The variable name in \code{x} that will serve as the
explanatory variable. This is an alternative to using the formula argument.}

\item{...}{Additional arguments for \link[stats:chisq.test]{chisq.test()}.}
}
\description{
@description
}
\details{
A shortcut wrapper function to get the observed test statistic for a chisq
test. Uses \link[stats:chisq.test]{chisq.test()}, which applies a continuity
correction. This function has been deprecated in favor of the more
general \code{\link[=observe]{observe()}}.
}
\examples{
# chi-squared test statistic for test of independence
# of college completion status depending and one's
# self-identified income class
chisq_stat(gss, college ~ finrela)

# chi-squared test statistic for a goodness of fit
# test on whether self-identified income class
# follows a uniform distribution
chisq_stat(gss,
           response = finrela,
           p = c("far below average" = 1/6,
                 "below average" = 1/6,
                 "average" = 1/6,
                 "above average" = 1/6,
                 "far above average" = 1/6,
                 "DK" = 1/6))

}
\seealso{
Other wrapper functions: 
\code{\link{chisq_test}()},
\code{\link{observe}()},
\code{\link{prop_test}()},
\code{\link{t_stat}()},
\code{\link{t_test}()}

Other functions for calculating observed statistics: 
\code{\link{observe}()},
\code{\link{t_stat}()}
}
\concept{functions for calculating observed statistics}
\concept{wrapper functions}


================================================
FILE: man/chisq_test.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wrappers.R
\name{chisq_test}
\alias{chisq_test}
\title{Tidy chi-squared test}
\usage{
chisq_test(x, formula, response = NULL, explanatory = NULL, ...)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{formula}{A formula with the response variable on the left and the
explanatory on the right. Alternatively, a \code{response} and \code{explanatory}
argument can be supplied.}

\item{response}{The variable name in \code{x} that will serve as the response.
This is an alternative to using the \code{formula} argument.}

\item{explanatory}{The variable name in \code{x} that will serve as the
explanatory variable. This is an alternative to using the formula argument.}

\item{...}{Additional arguments for \link[stats:chisq.test]{chisq.test()}.}
}
\description{
A tidier version of \link[stats:chisq.test]{chisq.test()} for goodness of fit
tests and tests of independence.
}
\examples{
# chi-squared test of independence for college completion
# status depending on one's self-identified income class
chisq_test(gss, college ~ finrela)

# chi-squared goodness of fit test on whether self-identified
# income class follows a uniform distribution
chisq_test(gss,
           response = finrela,
           p = c("far below average" = 1/6,
                 "below average" = 1/6,
                 "average" = 1/6,
                 "above average" = 1/6,
                 "far above average" = 1/6,
                 "DK" = 1/6))

}
\seealso{
Other wrapper functions: 
\code{\link{chisq_stat}()},
\code{\link{observe}()},
\code{\link{prop_test}()},
\code{\link{t_stat}()},
\code{\link{t_test}()}
}
\concept{wrapper functions}


================================================
FILE: man/deprecated.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/deprecated.R
\name{deprecated}
\alias{deprecated}
\alias{conf_int}
\alias{p_value}
\title{Deprecated functions and objects}
\usage{
conf_int(x, level = 0.95, type = "percentile", point_estimate = NULL)

p_value(x, obs_stat, direction)
}
\arguments{
\item{x}{See the non-deprecated function.}

\item{level}{See the non-deprecated function.}

\item{type}{See the non-deprecated function.}

\item{point_estimate}{See the non-deprecated function.}

\item{obs_stat}{See the non-deprecated function.}

\item{direction}{See the non-deprecated function.}
}
\description{
These functions and objects should no longer be used. They will be removed
in a future release of infer.
}
\seealso{
\code{\link[=get_p_value]{get_p_value()}}, \code{\link[=get_confidence_interval]{get_confidence_interval()}}, \code{\link[=generate]{generate()}}
}


================================================
FILE: man/fit.infer.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fit.R
\name{fit.infer}
\alias{fit.infer}
\title{Fit linear models to infer objects}
\usage{
\method{fit}{infer}(object, ...)
}
\arguments{
\item{object}{Output from an infer function---likely \code{\link[=generate]{generate()}} or
\code{\link[=specify]{specify()}}---which specifies the formula and data to fit a model to.}

\item{...}{Any optional arguments to pass along to the model fitting
function. See \code{\link[stats:glm]{stats::glm()}} for more information.}
}
\value{
A \link[tibble:tibble]{tibble} containing the following columns:

\itemize{
\item \code{replicate}: Only supplied if the input object had been previously
passed to \code{\link[=generate]{generate()}}. A number corresponding to which resample of the
original data set the model was fitted to.
\item \code{term}: The explanatory variable (or intercept) in question.
\item \code{estimate}: The model coefficient for the given resample (\code{replicate}) and
explanatory variable (\code{term}).
}
}
\description{
Given the output of an infer core function, this function will fit
a linear model using \code{\link[stats:glm]{stats::glm()}} according to the formula and data supplied
earlier in the pipeline. If passed the output of \code{\link[=specify]{specify()}} or
\code{\link[=hypothesize]{hypothesize()}}, the function will fit one model. If passed the output
of \code{\link[=generate]{generate()}}, it will fit a model to each data resample, denoted in
the \code{replicate} column. The family of the fitted model depends on the type
of the response variable. If the response is numeric, \code{fit()} will use
\code{family = "gaussian"} (linear regression). If the response is a 2-level
factor or character, \code{fit()} will use \code{family = "binomial"} (logistic
regression). To fit character or factor response variables with more than
two levels, we recommend \code{\link[parsnip:multinom_reg]{parsnip::multinom_reg()}}.

infer provides a fit "method" for infer objects, which is a way of carrying
out model fitting as applied to infer output. The "generic," imported from
the generics package and re-exported from this package, provides the
general form of \code{fit()} that points to infer's method when called on an
infer object. That generic is also documented here.

Learn more in \code{vignette("infer")}.
}
\details{
Randomization-based statistical inference with multiple explanatory
variables requires careful consideration of the null hypothesis in question
and its implications for permutation procedures. Inference for partial
regression coefficients via the permutation method implemented in
\code{\link[=generate]{generate()}} for multiple explanatory variables, consistent with its meaning
elsewhere in the package, is subject to additional distributional assumptions
beyond those required for one explanatory variable. Namely, the distribution
of the response variable must be similar to the distribution of the errors
under the null hypothesis' specification of a fixed effect of the explanatory
variables. (This null hypothesis is reflected in the \code{variables} argument to
\code{\link[=generate]{generate()}}. By default, all of the explanatory variables are treated
as fixed.) A general rule of thumb here is, if there are large outliers
in the distributions of any of the explanatory variables, this distributional
assumption will not be satisfied; when the response variable is permuted,
the (presumably outlying) value of the response will no longer be paired
with the outlier in the explanatory variable, causing an outsize effect
on the resulting slope coefficient for that explanatory variable.

More sophisticated methods that are outside of the scope of this package
requiring fewer---or less strict---distributional assumptions
exist. For an overview, see "Permutation tests for univariate or
multivariate analysis of variance and regression" (Marti J. Anderson,
2001), \doi{10.1139/cjfas-58-3-626}.
}
\section{Reproducibility}{
When using the infer package for research, or in other cases when exact
reproducibility is a priority, be sure the set the seed for R’s random
number generator. infer will respect the random seed specified in the
\code{set.seed()} function, returning the same result when \code{generate()}ing
data given an identical seed. For instance, we can calculate the
difference in mean \code{age} by \code{college} degree status using the \code{gss}
dataset from 10 versions of the \code{gss} resampled with permutation using
the following code.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{set.seed(1)

gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 5, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode">}}\preformatted{## Response: age (numeric)
## Explanatory: college (factor)
## Null Hypothesis: indepe...
## # A tibble: 5 x 2
##   replicate   stat
##       <int>  <dbl>
## 1         1 -0.531
## 2         2 -2.35 
## 3         3  0.764
## 4         4  0.280
## 5         5  0.350
}\if{html}{\out{</div>}}

Setting the seed to the same value again and rerunning the same code
will produce the same result.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{# set the seed
set.seed(1)

gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 5, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode">}}\preformatted{## Response: age (numeric)
## Explanatory: college (factor)
## Null Hypothesis: indepe...
## # A tibble: 5 x 2
##   replicate   stat
##       <int>  <dbl>
## 1         1 -0.531
## 2         2 -2.35 
## 3         3  0.764
## 4         4  0.280
## 5         5  0.350
}\if{html}{\out{</div>}}

Please keep this in mind when writing infer code that utilizes
resampling with \code{generate()}.
}

\examples{
# fit a linear model predicting number of hours worked per
# week using respondent age and degree status.
observed_fit <- gss |>
  specify(hours ~ age + college) |>
  fit()

observed_fit

# fit 100 models to resamples of the gss dataset, where the response
# `hours` is permuted in each. note that this code is the same as
# the above except for the addition of the `generate` step.
null_fits <- gss |>
  specify(hours ~ age + college) |>
  hypothesize(null = "independence") |>
  generate(reps = 100, type = "permute") |>
  fit()

null_fits

# for logistic regression, just supply a binary response variable!
# (this can also be made explicit via the `family` argument in ...)
gss |>
  specify(college ~ age + hours) |>
  fit()

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}


================================================
FILE: man/generate.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generate.R
\name{generate}
\alias{generate}
\title{Generate resamples, permutations, or simulations}
\usage{
generate(x, reps = 1, type = NULL, variables = !!response_expr(x), ...)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{reps}{The number of resamples to generate.}

\item{type}{The method used to generate resamples of the observed
data reflecting the null hypothesis. Currently one of
\code{"bootstrap"}, \code{"permute"}, or \code{"draw"} (see below).}

\item{variables}{If \code{type = "permute"}, a set of unquoted column names in the
data to permute (independently of each other). Defaults to only the
response variable. Note that any derived effects that depend on these
columns (e.g., interaction effects) will also be affected.}

\item{...}{Currently ignored.}
}
\value{
A tibble containing \code{reps} generated datasets, indicated by the
\code{replicate} column.
}
\description{
Generation creates a simulated distribution from \code{specify()}.
In the context of confidence intervals, this is a bootstrap distribution
based on the result of \code{specify()}. In the context of hypothesis testing,
this is a null distribution based on the result of \code{specify()} and
\verb{hypothesize().}

Learn more in \code{vignette("infer")}.
}
\section{Generation Types}{


The \code{type} argument determines the method used to create the null
distribution.

\itemize{
\item \code{bootstrap}: A bootstrap sample will be drawn for each replicate,
where a sample of size equal to the input sample size is drawn (with
replacement) from the input sample data.
\item \code{permute}: For each replicate, each input value will be randomly
reassigned (without replacement) to a new output value in the sample.
\item \code{draw}: A value will be sampled from a theoretical distribution
with parameter \code{p} specified in \code{\link[=hypothesize]{hypothesize()}} for each replicate. This
option is currently only applicable for testing on one proportion. This
generation type was previously called \code{"simulate"}, which has been
superseded.
}
}

\section{Reproducibility}{
When using the infer package for research, or in other cases when exact
reproducibility is a priority, be sure the set the seed for R’s random
number generator. infer will respect the random seed specified in the
\code{set.seed()} function, returning the same result when \code{generate()}ing
data given an identical seed. For instance, we can calculate the
difference in mean \code{age} by \code{college} degree status using the \code{gss}
dataset from 10 versions of the \code{gss} resampled with permutation using
the following code.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{set.seed(1)

gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 5, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode">}}\preformatted{## Response: age (numeric)
## Explanatory: college (factor)
## Null Hypothesis: indepe...
## # A tibble: 5 x 2
##   replicate   stat
##       <int>  <dbl>
## 1         1 -0.531
## 2         2 -2.35 
## 3         3  0.764
## 4         4  0.280
## 5         5  0.350
}\if{html}{\out{</div>}}

Setting the seed to the same value again and rerunning the same code
will produce the same result.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{# set the seed
set.seed(1)

gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 5, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode">}}\preformatted{## Response: age (numeric)
## Explanatory: college (factor)
## Null Hypothesis: indepe...
## # A tibble: 5 x 2
##   replicate   stat
##       <int>  <dbl>
## 1         1 -0.531
## 2         2 -2.35 
## 3         3  0.764
## 4         4  0.280
## 5         5  0.350
}\if{html}{\out{</div>}}

Please keep this in mind when writing infer code that utilizes
resampling with \code{generate()}.
}

\examples{
# generate a null distribution by taking 200 bootstrap samples
gss |>
 specify(response = hours) |>
 hypothesize(null = "point", mu = 40) |>
 generate(reps = 200, type = "bootstrap")

# generate a null distribution for the independence of
# two variables by permuting their values 200 times
gss |>
 specify(partyid ~ age) |>
 hypothesize(null = "independence") |>
 generate(reps = 200, type = "permute")

# generate a null distribution via sampling from a
# binomial distribution 200 times
gss |>
specify(response = sex, success = "female") |>
  hypothesize(null = "point", p = .5) |>
  generate(reps = 200, type = "draw") |>
  calculate(stat = "z")

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other core functions: 
\code{\link{calculate}()},
\code{\link{hypothesize}()},
\code{\link{specify}()}
}
\concept{core functions}


================================================
FILE: man/get_confidence_interval.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_confidence_interval.R
\name{get_confidence_interval}
\alias{get_confidence_interval}
\alias{get_ci}
\title{Compute confidence interval}
\usage{
get_confidence_interval(x, level = 0.95, type = NULL, point_estimate = NULL)

get_ci(x, level = 0.95, type = NULL, point_estimate = NULL)
}
\arguments{
\item{x}{A distribution. For simulation-based inference, a data frame
containing a distribution of \code{\link[=calculate]{calculate()}}d statistics
or \code{\link[=fit.infer]{fit()}}ted coefficient estimates. This object should
have been passed to \code{\link[=generate]{generate()}} before being supplied or
\code{\link[=calculate]{calculate()}} to \code{\link[=fit.infer]{fit()}}. For theory-based inference,
output of \code{\link[=assume]{assume()}}. Distributions for confidence intervals do not
require a null hypothesis via \code{\link[=hypothesize]{hypothesize()}}.}

\item{level}{A numerical value between 0 and 1 giving the confidence level.
Default value is 0.95.}

\item{type}{A string giving which method should be used for creating the
confidence interval. The default is \code{"percentile"} with \code{"se"}
corresponding to (multiplier * standard error) and \code{"bias-corrected"} for
bias-corrected interval as other options.}

\item{point_estimate}{A data frame containing the observed statistic (in a
\code{\link[=calculate]{calculate()}}-based workflow) or observed fit (in a
\code{\link[=fit.infer]{fit()}}-based workflow). This object is likely the output
of \code{\link[=calculate]{calculate()}} or \code{\link[=fit.infer]{fit()}} and need not
to have been passed to \code{\link[=generate]{generate()}}. Set to \code{NULL} by
default. Must be provided if \code{type} is \code{"se"} or \code{"bias-corrected"}.}
}
\value{
A \link[tibble:tibble]{tibble} containing the following columns:

\itemize{
\item \code{term}: The explanatory variable (or intercept) in question. Only
supplied if the input had been previously passed to \code{\link[=fit.infer]{fit()}}.
\item \code{lower_ci}, \code{upper_ci}: The lower and upper bounds of the confidence
interval, respectively.
}
}
\description{
Compute a confidence interval around a summary statistic. Both
simulation-based and theoretical methods are supported, though only
\code{type = "se"} is supported for theoretical methods.

Learn more in \code{vignette("infer")}.
}
\details{
A null hypothesis is not required to compute a confidence interval. However,
including \code{\link[=hypothesize]{hypothesize()}} in a pipeline leading to \code{get_confidence_interval()}
will not break anything. This can be useful when computing a confidence
interval using the same distribution used to compute a p-value.

Theoretical confidence intervals (i.e. calculated by supplying the output
of \code{\link[=assume]{assume()}} to the \code{x} argument) require that the point estimate lies on
the scale of the data. The distribution defined in \code{\link[=assume]{assume()}} will be
recentered and rescaled to align with the point estimate, as can be shown
in the output of \code{\link[=visualize]{visualize()}} when paired with \code{\link[=shade_confidence_interval]{shade_confidence_interval()}}.
Confidence intervals are implemented for the following distributions and
point estimates:

\itemize{
\item \code{distribution = "t"}: \code{point_estimate} should be the output of
\code{\link[=calculate]{calculate()}} with \code{stat = "mean"} or \code{stat = "diff in means"}
\item \code{distribution = "z"}: \code{point_estimate} should be the output of
\code{\link[=calculate]{calculate()}} with \code{stat = "prop"} or \code{stat = "diff in props"}
}
}
\section{Aliases}{

\code{get_ci()} is an alias of \code{get_confidence_interval()}.
\code{conf_int()} is a deprecated alias of \code{get_confidence_interval()}.
}

\examples{

boot_dist <- gss |>
  # We're interested in the number of hours worked per week
  specify(response = hours) |>
  # Generate bootstrap samples
  generate(reps = 1000, type = "bootstrap") |>
  # Calculate mean of each bootstrap sample
  calculate(stat = "mean")

boot_dist |>
  # Calculate the confidence interval around the point estimate
  get_confidence_interval(
    # At the 95\% confidence level; percentile method
    level = 0.95
  )

# for type = "se" or type = "bias-corrected" we need a point estimate
sample_mean <- gss |>
  specify(response = hours) |>
  calculate(stat = "mean")

boot_dist |>
  get_confidence_interval(
    point_estimate = sample_mean,
    # At the 95\% confidence level
    level = 0.95,
    # Using the standard error method
    type = "se"
  )

# using a theoretical distribution -----------------------------------

# define a sampling distribution
sampling_dist <- gss |>
  specify(response = hours) |>
  assume("t")

# get the confidence interval---note that the
# point estimate is required here
get_confidence_interval(
  sampling_dist,
  level = .95,
  point_estimate = sample_mean
)

# using a model fitting workflow -----------------------

# fit a linear model predicting number of hours worked per
# week using respondent age and degree status.
observed_fit <- gss |>
  specify(hours ~ age + college) |>
  fit()

observed_fit

# fit 100 models to resamples of the gss dataset, where the response
# `hours` is permuted in each. note that this code is the same as
# the above except for the addition of the `generate` step.
null_fits <- gss |>
  specify(hours ~ age + college) |>
  hypothesize(null = "independence") |>
  generate(reps = 100, type = "permute") |>
  fit()

null_fits

get_confidence_interval(
  null_fits,
  point_estimate = observed_fit,
  level = .95
)

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other auxillary functions: 
\code{\link{get_p_value}()}
}
\concept{auxillary functions}


================================================
FILE: man/get_p_value.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_p_value.R
\name{get_p_value}
\alias{get_p_value}
\alias{get_p_value.default}
\alias{get_pvalue}
\alias{get_p_value.infer_dist}
\title{Compute p-value}
\usage{
get_p_value(x, obs_stat, direction)

\method{get_p_value}{default}(x, obs_stat, direction)

get_pvalue(x, obs_stat, direction)

\method{get_p_value}{infer_dist}(x, obs_stat, direction)
}
\arguments{
\item{x}{A null distribution. For simulation-based inference, a data frame
containing a distribution of \code{\link[=calculate]{calculate()}}d statistics
or \code{\link[=fit.infer]{fit()}}ted coefficient estimates. This object should
have been passed to \code{\link[=generate]{generate()}} before being supplied or
\code{\link[=calculate]{calculate()}} to \code{\link[=fit.infer]{fit()}}. For theory-based inference,
the output of \code{\link[=assume]{assume()}}.}

\item{obs_stat}{A data frame containing the observed statistic (in a
\code{\link[=calculate]{calculate()}}-based workflow) or observed fit (in a
\code{\link[=fit.infer]{fit()}}-based workflow). This object is likely the output
of \code{\link[=calculate]{calculate()}} or \code{\link[=fit.infer]{fit()}} and need not
to have been passed to \code{\link[=generate]{generate()}}.}

\item{direction}{A character string. Options are \code{"less"}, \code{"greater"}, or
\code{"two-sided"}. Can also use \code{"left"}, \code{"right"}, \code{"both"},
\code{"two_sided"}, or \code{"two sided"}, \code{"two.sided"}.}
}
\value{
A \link[tibble:tibble]{tibble} containing the following columns:

\itemize{
\item \code{term}: The explanatory variable (or intercept) in question. Only
supplied if the input had been previously passed to \code{\link[=fit.infer]{fit()}}.
\item \code{p_value}: A value in [0, 1] giving the probability that a
statistic/coefficient as or more extreme than the observed
statistic/coefficient would occur if the null hypothesis were true.
}
}
\description{
Compute a p-value from a null distribution and observed statistic.

Learn more in \code{vignette("infer")}.
}
\section{Aliases}{

\code{get_pvalue()} is an alias of \code{get_p_value()}.
\code{p_value} is a deprecated alias of \code{get_p_value()}.
}

\section{Zero p-value}{

Though a true p-value of 0 is impossible, \code{get_p_value()} may return 0 in
some cases. This is due to the simulation-based nature of the \{infer\}
package; the output of this function is an approximation based on
the number of \code{reps} chosen in the \code{generate()} step. When the observed
statistic is very unlikely given the null hypothesis, and only a small
number of \code{reps} have been generated to form a null distribution,
it is possible that the observed statistic will be more extreme than
every test statistic generated to form the null distribution, resulting
in an approximate p-value of 0. In this case, the true p-value is a small
value likely less than \code{3/reps} (based on a poisson approximation).

In the case that a p-value of zero is reported, a warning message will be
raised to caution the user against reporting a p-value exactly equal to 0.
}

\examples{

# using a simulation-based null distribution ------------------------------

# find the point estimate---mean number of hours worked per week
point_estimate <- gss |>
  specify(response = hours) |>
  calculate(stat = "mean")

# starting with the gss dataset
gss |>
  # ...we're interested in the number of hours worked per week
  specify(response = hours) |>
  # hypothesizing that the mean is 40
  hypothesize(null = "point", mu = 40) |>
  # generating data points for a null distribution
  generate(reps = 1000, type = "bootstrap") |>
  # finding the null distribution
  calculate(stat = "mean") |>
  get_p_value(obs_stat = point_estimate, direction = "two-sided")

# using a theoretical null distribution -----------------------------------

# calculate the observed statistic
obs_stat <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t")

# define a null distribution
null_dist <- gss |>
  specify(response = hours) |>
  assume("t")

# calculate a p-value
get_p_value(null_dist, obs_stat, direction = "both")

# using a model fitting workflow -----------------------------------------

# fit a linear model predicting number of hours worked per
# week using respondent age and degree status.
observed_fit <- gss |>
  specify(hours ~ age + college) |>
  fit()

observed_fit

# fit 100 models to resamples of the gss dataset, where the response
# `hours` is permuted in each. note that this code is the same as
# the above except for the addition of the `generate` step.
null_fits <- gss |>
  specify(hours ~ age + college) |>
  hypothesize(null = "independence") |>
  generate(reps = 100, type = "permute") |>
  fit()

null_fits

get_p_value(null_fits, obs_stat = observed_fit, direction = "two-sided")

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other auxillary functions: 
\code{\link{get_confidence_interval}()}
}
\concept{auxillary functions}


================================================
FILE: man/gss.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gss.R
\docType{data}
\name{gss}
\alias{gss}
\title{Subset of data from the General Social Survey (GSS).}
\format{
A tibble with 500 rows and 11 variables:
\describe{
\item{year}{year respondent was surveyed}
\item{age}{age at time of survey, truncated at 89}
\item{sex}{respondent's sex (self-identified)}
\item{college}{whether on not respondent has a college degree, including
junior/community college}
\item{partyid}{political party affiliation}
\item{hompop}{number of persons in household}
\item{hours}{number of hours worked in week before survey, truncated at 89}
\item{income}{total family income}
\item{class}{subjective socioeconomic class identification}
\item{finrela}{opinion of family income}
\item{weight}{survey weight}
}
}
\source{
\url{https://gss.norc.org}
}
\usage{
gss
}
\description{
The General Social Survey is a high-quality survey which gathers data on
American society and opinions, conducted since 1972. This data set is a
sample of 500 entries from the GSS, spanning years 1973-2018,
including demographic markers and some
economic variables. Note that this data is included for demonstration only,
and should not be assumed to provide accurate estimates relating to the GSS.
However, due to the high quality of the GSS, the unweighted data will
approximate the weighted data in some analyses.
}
\keyword{datasets}


================================================
FILE: man/hypothesize.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hypothesize.R
\name{hypothesize}
\alias{hypothesize}
\alias{hypothesise}
\title{Declare a null hypothesis}
\usage{
hypothesize(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL)

hypothesise(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{null}{The null hypothesis. Options include \code{"independence"},
\code{"point"}, and \code{"paired independence"}.
\itemize{
\item \code{independence}: Should be used with both a \code{response} and \code{explanatory}
variable. Indicates that the values of the specified \code{response} variable
are independent of the associated values in \code{explanatory}.
\item \code{point}: Should be used with only a \code{response} variable. Indicates
that a point estimate based on the values in \code{response} is associated
with a parameter. Sometimes requires supplying one of \code{p}, \code{mu}, \code{med}, or
\code{sigma}.
\item \verb{paired independence}: Should be used with only a \code{response} variable
giving the pre-computed difference between paired observations. Indicates
that the order of subtraction between paired values does not affect the
resulting distribution.
}}

\item{p}{The true proportion of successes (a number between 0 and 1). To be used with point null hypotheses when the specified response
variable is categorical.}

\item{mu}{The true mean (any numerical value). To be used with point null
hypotheses when the specified response variable is continuous.}

\item{med}{The true median (any numerical value). To be used with point null
hypotheses when the specified response variable is continuous.}

\item{sigma}{The true standard deviation (any numerical value). To be used with
point null hypotheses.}
}
\value{
A tibble containing the response (and explanatory, if specified)
variable data with parameter information stored as well.
}
\description{
Declare a null hypothesis about variables selected in \code{\link[=specify]{specify()}}.

Learn more in \code{vignette("infer")}.
}
\examples{
# hypothesize independence of two variables
gss |>
 specify(college ~ partyid, success = "degree") |>
 hypothesize(null = "independence")

# hypothesize a mean number of hours worked per week of 40
gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40)

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other core functions: 
\code{\link{calculate}()},
\code{\link{generate}()},
\code{\link{specify}()}
}
\concept{core functions}


================================================
FILE: man/infer.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/infer.R
\docType{package}
\name{infer}
\alias{infer-package}
\alias{infer}
\title{infer: a grammar for statistical inference}
\description{
The objective of this package is to perform statistical inference using a
grammar that illustrates the underlying concepts and a format that coheres
with the tidyverse.
}
\details{
For an overview of how to use the core functionality, see \code{vignette("infer")}
}
\seealso{
Useful links:
\itemize{
  \item \url{https://github.com/tidymodels/infer}
  \item \url{https://infer.tidymodels.org/}
  \item Report bugs at \url{https://github.com/tidymodels/infer/issues}
}

}
\author{
\strong{Maintainer}: Simon Couch \email{simon.couch@posit.co} (\href{https://orcid.org/0000-0001-5676-5107}{ORCID})

Authors:
\itemize{
  \item Andrew Bray \email{abray@reed.edu}
  \item Chester Ismay \email{chester.ismay@gmail.com} (\href{https://orcid.org/0000-0003-2820-2547}{ORCID})
  \item Evgeni Chasnovski \email{evgeni.chasnovski@gmail.com} (\href{https://orcid.org/0000-0002-1617-4019}{ORCID})
  \item Ben Baumer \email{ben.baumer@gmail.com} (\href{https://orcid.org/0000-0002-3279-0516}{ORCID})
  \item Mine Cetinkaya-Rundel \email{mine@stat.duke.edu} (\href{https://orcid.org/0000-0001-6452-2420}{ORCID})
}

Other contributors:
\itemize{
  \item Ted Laderas \email{tedladeras@gmail.com} (\href{https://orcid.org/0000-0002-6207-7068}{ORCID}) [contributor]
  \item Nick Solomon \email{nick.solomon@datacamp.com} [contributor]
  \item Johanna Hardin \email{Jo.Hardin@pomona.edu} [contributor]
  \item Albert Y. Kim \email{albert.ys.kim@gmail.com} (\href{https://orcid.org/0000-0001-7824-306X}{ORCID}) [contributor]
  \item Neal Fultz \email{nfultz@gmail.com} [contributor]
  \item Doug Friedman \email{doug.nhp@gmail.com} [contributor]
  \item Richie Cotton \email{richie@datacamp.com} (\href{https://orcid.org/0000-0003-2504-802X}{ORCID}) [contributor]
  \item Brian Fannin \email{captain@pirategrunt.com} [contributor]
}

}


================================================
FILE: man/observe.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/observe.R
\name{observe}
\alias{observe}
\title{Calculate observed statistics}
\usage{
observe(
  x,
  formula,
  response = NULL,
  explanatory = NULL,
  success = NULL,
  null = NULL,
  p = NULL,
  mu = NULL,
  med = NULL,
  sigma = NULL,
  stat = c("mean", "median", "sum", "sd", "prop", "count", "diff in means",
    "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z",
    "ratio of props", "odds ratio"),
  order = NULL,
  ...
)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{formula}{A formula with the response variable on the left and the
explanatory on the right. Alternatively, a \code{response} and \code{explanatory}
argument can be supplied.}

\item{response}{The variable name in \code{x} that will serve as the response.
This is an alternative to using the \code{formula} argument.}

\item{explanatory}{The variable name in \code{x} that will serve as the
explanatory variable. This is an alternative to using the formula argument.}

\item{success}{The level of \code{response} that will be considered a success, as
a string. Needed for inference on one proportion, a difference in
proportions, and corresponding z stats.}

\item{null}{The null hypothesis. Options include \code{"independence"},
\code{"point"}, and \code{"paired independence"}.
\itemize{
\item \code{independence}: Should be used with both a \code{response} and \code{explanatory}
variable. Indicates that the values of the specified \code{response} variable
are independent of the associated values in \code{explanatory}.
\item \code{point}: Should be used with only a \code{response} variable. Indicates
that a point estimate based on the values in \code{response} is associated
with a parameter. Sometimes requires supplying one of \code{p}, \code{mu}, \code{med}, or
\code{sigma}.
\item \verb{paired independence}: Should be used with only a \code{response} variable
giving the pre-computed difference between paired observations. Indicates
that the order of subtraction between paired values does not affect the
resulting distribution.
}}

\item{p}{The true proportion of successes (a number between 0 and 1). To be used with point null hypotheses when the specified response
variable is categorical.}

\item{mu}{The true mean (any numerical value). To be used with point null
hypotheses when the specified response variable is continuous.}

\item{med}{The true median (any numerical value). To be used with point null
hypotheses when the specified response variable is continuous.}

\item{sigma}{The true standard deviation (any numerical value). To be used with
point null hypotheses.}

\item{stat}{A string giving the type of the statistic to calculate or a
function that takes in a replicate of \code{x} and returns a scalar value. Current
options include \code{"mean"}, \code{"median"}, \code{"sum"}, \code{"sd"}, \code{"prop"}, \code{"count"},
\code{"diff in means"}, \code{"diff in medians"}, \code{"diff in props"}, \code{"Chisq"} (or
\code{"chisq"}), \code{"F"} (or \code{"f"}), \code{"t"}, \code{"z"}, \code{"ratio of props"}, \code{"slope"},
\code{"odds ratio"}, \code{"ratio of means"}, and \code{"correlation"}. \code{infer} only
supports theoretical tests on one or two means via the \code{"t"} distribution
and one or two proportions via the \code{"z"}. See the "Arbitrary test statistics"
section below for more on how to define a custom statistic.}

\item{order}{A string vector of specifying the order in which the levels of
the explanatory variable should be ordered for subtraction (or division
for ratio-based statistics), where \code{order = c("first", "second")} means
\code{("first" - "second")}, or the analogue for ratios. Needed for inference on
difference in means, medians, proportions, ratios, t, and z statistics.}

\item{...}{To pass options like \code{na.rm = TRUE} into functions like
\link[base:mean]{mean()}, \link[stats:sd]{sd()}, etc. Can also be used to
supply hypothesized null values for the \code{"t"} statistic or additional
arguments to \code{\link[stats:chisq.test]{stats::chisq.test()}}.}
}
\value{
A 1-column tibble containing the calculated statistic \code{stat}.
}
\description{
This function is a wrapper that calls \code{\link[=specify]{specify()}}, \code{\link[=hypothesize]{hypothesize()}}, and
\code{\link[=calculate]{calculate()}} consecutively that can be used to calculate observed
statistics from data. \code{\link[=hypothesize]{hypothesize()}} will only be called if a point
null hypothesis parameter is supplied.

Learn more in \code{vignette("infer")}.
}
\section{Arbitrary test statistics}{


In addition to the pre-implemented statistics documented in \code{stat}, users can
supply an arbitrary test statistic by supplying a function to the \code{stat}
argument.

The function should have arguments \code{stat(x, order, ...)}, where \code{x} is one
replicate's worth of \code{x}. The \code{order} argument and ellipses will be supplied
directly to the \code{stat} function. Internally, \code{calculate()} will split \code{x} up
into data frames by replicate and pass them one-by-one to the supplied \code{stat}.
For example, to implement \code{stat = "mean"} as a function, one could write:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{stat_mean <- function(x, order, ...) \{mean(x$hours)\}
obs_mean <-
  gss \%>\%
  specify(response = hours) \%>\%
  calculate(stat = stat_mean)

set.seed(1)
null_dist_mean <-
  gss \%>\%
  specify(response = hours) \%>\%
  hypothesize(null = "point", mu = 40) \%>\%
  generate(reps = 5, type = "bootstrap") \%>\%
  calculate(stat = stat_mean)
}\if{html}{\out{</div>}}

Note that the same \code{stat_mean} function is supplied to both \code{generate()}d and
non-\code{generate()}d infer objects--no need to implement support for grouping
by \code{replicate} yourself.
}

\examples{
# calculating the observed mean number of hours worked per week
gss |>
  observe(hours ~ NULL, stat = "mean")

# equivalently, calculating the same statistic with the core verbs
gss |>
  specify(response = hours) |>
  calculate(stat = "mean")

# calculating a t statistic for hypothesized mu = 40 hours worked/week
gss |>
  observe(hours ~ NULL, stat = "t", null = "point", mu = 40)

# equivalently, calculating the same statistic with the core verbs
gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t")

# similarly for a difference in means in age based on whether
# the respondent has a college degree
observe(
  gss,
  age ~ college,
  stat = "diff in means",
  order = c("degree", "no degree")
)

# equivalently, calculating the same statistic with the core verbs
gss |>
  specify(age ~ college) |>
  calculate("diff in means", order = c("degree", "no degree"))

# for a more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other wrapper functions: 
\code{\link{chisq_stat}()},
\code{\link{chisq_test}()},
\code{\link{prop_test}()},
\code{\link{t_stat}()},
\code{\link{t_test}()}

Other functions for calculating observed statistics: 
\code{\link{chisq_stat}()},
\code{\link{t_stat}()}
}
\concept{functions for calculating observed statistics}
\concept{wrapper functions}


================================================
FILE: man/pipe.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pipe.R
\name{\%>\%}
\alias{\%>\%}
\title{Pipe}
\arguments{
\item{lhs, rhs}{Inference functions and the initial data frame.}
}
\description{
Like \{dplyr\}, \{infer\} also uses the pipe (\code{|>}) function
from \code{magrittr} to turn function composition into a series of
iterative statements.
}


================================================
FILE: man/print.infer.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/print_methods.R
\name{print.infer}
\alias{print.infer}
\alias{print.infer_layer}
\alias{print.infer_dist}
\title{Print methods}
\usage{
\method{print}{infer}(x, ...)

\method{print}{infer_layer}(x, ...)

\method{print}{infer_dist}(x, ...)
}
\arguments{
\item{x}{An object of class \code{infer}, i.e. output from \code{\link[=specify]{specify()}} or
\code{\link[=hypothesize]{hypothesize()}}, or of class \code{infer_layer}, i.e. output from
\code{\link[=shade_p_value]{shade_p_value()}} or \code{\link[=shade_confidence_interval]{shade_confidence_interval()}}.}

\item{...}{Arguments passed to methods.}
}
\description{
Print methods
}


================================================
FILE: man/prop_test.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wrappers.R
\name{prop_test}
\alias{prop_test}
\title{Tidy proportion test}
\usage{
prop_test(
  x,
  formula,
  response = NULL,
  explanatory = NULL,
  p = NULL,
  order = NULL,
  alternative = "two-sided",
  conf_int = TRUE,
  conf_level = 0.95,
  success = NULL,
  correct = NULL,
  z = FALSE,
  ...
)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{formula}{A formula with the response variable on the left and the
explanatory on the right. Alternatively, a \code{response} and \code{explanatory}
argument can be supplied.}

\item{response}{The variable name in \code{x} that will serve as the response.
This is an alternative to using the \code{formula} argument.}

\item{explanatory}{The variable name in \code{x} that will serve as the
explanatory variable. This is an alternative to using the formula argument.}

\item{p}{A numeric vector giving the hypothesized null proportion of
success for each group.}

\item{order}{A string vector specifying the order in which the proportions
should be subtracted, where  \code{order = c("first", "second")} means
\code{"first" - "second"}. Ignored for one-sample tests, and optional for two
sample tests.}

\item{alternative}{Character string giving the direction of the alternative
hypothesis. Options are \code{"two-sided"} (default), \code{"greater"}, or \code{"less"}.
Only used when testing the null that a single proportion equals a given
value, or that two proportions are equal; ignored otherwise.}

\item{conf_int}{A logical value for whether to include the confidence
interval or not. \code{TRUE} by default.}

\item{conf_level}{A numeric value between 0 and 1. Default value is 0.95.}

\item{success}{The level of \code{response} that will be considered a success, as
a string. Only used when testing the null that a single
proportion equals a given value, or that two proportions are equal;
ignored otherwise.}

\item{correct}{A logical indicating whether Yates' continuity correction
should be applied where possible. If \code{z = TRUE}, the \code{correct} argument will
be overwritten as \code{FALSE}. Otherwise defaults to \code{correct = TRUE}.}

\item{z}{A logical value for whether to report the statistic as a standard
normal deviate or a Pearson's chi-square statistic. \eqn{z^2}  is distributed
chi-square with 1 degree of freedom, though note that the user will likely
need to turn off Yates' continuity correction by setting \code{correct = FALSE}
to see this connection.}

\item{...}{Additional arguments for \link[stats:prop.test]{prop.test()}.}
}
\description{
A tidier version of \link[stats:prop.test]{prop.test()} for equal or given
proportions.
}
\details{
When testing with an explanatory variable with more than two levels, the
\code{order} argument as used in the package is no longer well-defined. The function
will thus raise a warning and ignore the value if supplied a non-NULL \code{order}
argument.

The columns present in the output depend on the output of both \code{\link[=prop.test]{prop.test()}}
and \code{\link[broom:tidy.htest]{broom::glance.htest()}}. See the latter's documentation for column
definitions; columns have been renamed with the following mapping:
\itemize{
\item \code{chisq_df} = \code{parameter}
\item \code{p_value} = \code{p.value}
\item \code{lower_ci} = \code{conf.low}
\item \code{upper_ci} = \code{conf.high}
}
}
\examples{
# two-sample proportion test for difference in proportions of
# college completion by respondent sex
prop_test(gss,
          college ~ sex,
          order = c("female", "male"))

# one-sample proportion test for hypothesized null
# proportion of college completion of .2
prop_test(gss,
          college ~ NULL,
          p = .2)

# report as a z-statistic rather than chi-square
# and specify the success level of the response
prop_test(gss,
          college ~ NULL,
          success = "degree",
          p = .2,
          z = TRUE)

}
\seealso{
Other wrapper functions: 
\code{\link{chisq_stat}()},
\code{\link{chisq_test}()},
\code{\link{observe}()},
\code{\link{t_stat}()},
\code{\link{t_test}()}
}
\concept{wrapper functions}


================================================
FILE: man/reexports.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fit.R, R/visualize.R
\docType{import}
\name{reexports}
\alias{reexports}
\alias{fit}
\alias{ggplot_add}
\title{Objects exported from other packages}
\details{
Read more about infer's \link[=fit.infer]{fit} function \link[=fit.infer]{here} or
by running \code{?fit.infer} in your console.
}
\keyword{internal}
\description{
These objects are imported from other packages. Follow the links
below to see their documentation.

\describe{
  \item{generics}{\code{\link[generics]{fit}}}

  \item{ggplot2}{\code{\link[ggplot2:update_ggplot]{ggplot_add}}}
}}


================================================
FILE: man/rep_sample_n.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rep_sample_n.R
\name{rep_sample_n}
\alias{rep_sample_n}
\alias{rep_slice_sample}
\title{Perform repeated sampling}
\usage{
rep_sample_n(tbl, size, replace = FALSE, reps = 1, prob = NULL)

rep_slice_sample(
  .data,
  n = NULL,
  prop = NULL,
  replace = FALSE,
  weight_by = NULL,
  reps = 1
)
}
\arguments{
\item{tbl, .data}{Data frame of population from which to sample.}

\item{size, n, prop}{\code{size} and \code{n} refer to the sample size of each sample.
The \code{size} argument to \code{rep_sample_n()} is required, while in
\code{rep_slice_sample()} sample size defaults to 1 if not specified. \code{prop}, an
argument to \code{rep_slice_sample()}, refers to the proportion of rows to sample
in each sample, and is rounded down in the case that \code{prop * nrow(.data)} is
not an integer. When using \code{rep_slice_sample()}, please only supply one of
\code{n} or \code{prop}.}

\item{replace}{Should samples be taken with replacement?}

\item{reps}{Number of samples to take.}

\item{prob, weight_by}{A vector of sampling weights for each of the rows in
\code{.data}—must have length equal to \code{nrow(.data)}. For \code{weight_by}, this
may also be an unquoted column name in \code{.data}.}
}
\value{
A tibble of size \code{reps * n} rows corresponding to \code{reps}
samples of size \code{n} from \code{.data}, grouped by \code{replicate}.
}
\description{
These functions extend the functionality of \code{\link[dplyr:sample_n]{dplyr::sample_n()}} and
\code{\link[dplyr:slice]{dplyr::slice_sample()}} by allowing for repeated sampling of data.
This operation is especially helpful while creating sampling
distributions—see the examples below!
}
\details{
\code{rep_sample_n()} and \code{rep_slice_sample()} are designed to behave similar to
their dplyr counterparts. As such, they have at least the following
differences:
\itemize{
\item In case \code{replace = FALSE} having \code{size} bigger than number of data rows in
\code{rep_sample_n()} will give an error. In \code{rep_slice_sample()} having such \code{n}
or \code{prop > 1} will give warning and output sample size will be set to number
of rows in data.
}

Note that the \code{\link[dplyr:sample_n]{dplyr::sample_n()}} function  has been superseded by
\code{\link[dplyr:slice]{dplyr::slice_sample()}}.
}
\examples{
library(dplyr)
library(ggplot2)
library(tibble)

# take 1000 samples of size n = 50, without replacement
slices <- gss |>
  rep_slice_sample(n = 50, reps = 1000)

slices

# compute the proportion of respondents with a college
# degree in each replicate
p_hats <- slices |>
  group_by(replicate) |>
  summarize(prop_college = mean(college == "degree"))

# plot sampling distribution
ggplot(p_hats, aes(x = prop_college)) +
  geom_density() +
  labs(
    x = "p_hat", y = "Number of samples",
    title = "Sampling distribution of p_hat"
  )

# sampling with probability weights. Note probabilities are automatically
# renormalized to sum to 1
df <- tibble(
  id = 1:5,
  letter = factor(c("a", "b", "c", "d", "e"))
)

rep_slice_sample(df, n = 2, reps = 5, weight_by = c(.5, .4, .3, .2, .1))

# alternatively, pass an unquoted column name in `.data` as `weight_by`
df <- df |> mutate(wts = c(.5, .4, .3, .2, .1))

rep_slice_sample(df, n = 2, reps = 5, weight_by = wts)
}


================================================
FILE: man/shade_confidence_interval.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/shade_confidence_interval.R
\name{shade_confidence_interval}
\alias{shade_confidence_interval}
\alias{shade_ci}
\title{Add information about confidence interval}
\usage{
shade_confidence_interval(
  endpoints,
  color = "mediumaquamarine",
  fill = "turquoise",
  ...
)

shade_ci(endpoints, color = "mediumaquamarine", fill = "turquoise", ...)
}
\arguments{
\item{endpoints}{The lower and upper bounds of the interval to be plotted.
Likely, this will be the output of \code{\link[=get_confidence_interval]{get_confidence_interval()}}.
For \code{\link[=calculate]{calculate()}}-based workflows, this will be a 2-element vector
or a \verb{1 x 2} data frame containing the lower and upper values to be plotted.
For \code{\link[=fit.infer]{fit()}}-based workflows, a \verb{(p + 1) x 3} data frame
with columns \code{term}, \code{lower_ci}, and \code{upper_ci}, giving the upper and
lower bounds for each regression term. For use in visualizations of
\code{\link[=assume]{assume()}} output, this must be the output of \code{\link[=get_confidence_interval]{get_confidence_interval()}}.}

\item{color}{A character or hex string specifying the color of the
end points as a vertical lines on the plot.}

\item{fill}{A character or hex string specifying the color to shade the
confidence interval. If \code{NULL} then no shading is actually done.}

\item{...}{Other arguments passed along to ggplot2 functions.}
}
\value{
If added to an existing infer visualization, a ggplot2
object displaying the supplied intervals on top of its corresponding
distribution. Otherwise, an \code{infer_layer} list.
}
\description{
\code{shade_confidence_interval()} plots a confidence interval region on top of
\code{\link[=visualize]{visualize()}} output. The output is a ggplot2 layer that can be added with
\code{+}. The function has a shorter alias, \code{shade_ci()}.

Learn more in \code{vignette("infer")}.
}
\examples{
# find the point estimate---mean number of hours worked per week
point_estimate <- gss |>
  specify(response = hours) |>
  calculate(stat = "mean")

# ...and a bootstrap distribution
boot_dist <- gss |>
  # ...we're interested in the number of hours worked per week
  specify(response = hours) |>
  # generating data points
  generate(reps = 1000, type = "bootstrap") |>
  # finding the distribution from the generated data
  calculate(stat = "mean")

# find a confidence interval around the point estimate
ci <- boot_dist |>
  get_confidence_interval(point_estimate = point_estimate,
                          # at the 95\% confidence level
                          level = .95,
                          # using the standard error method
                          type = "se")


# and plot it!
boot_dist |>
  visualize() +
  shade_confidence_interval(ci)

# or just plot the bounds
boot_dist |>
  visualize() +
  shade_confidence_interval(ci, fill = NULL)

# you can shade confidence intervals on top of
# theoretical distributions, too---the theoretical
# distribution will be recentered and rescaled to
# align with the confidence interval
sampling_dist <- gss |>
  specify(response = hours) |>
  assume(distribution = "t")

visualize(sampling_dist) +
  shade_confidence_interval(ci)

\donttest{
# to visualize distributions of coefficients for multiple
# explanatory variables, use a `fit()`-based workflow

# fit 1000 linear models with the `hours` variable permuted
null_fits <- gss |>
 specify(hours ~ age + college) |>
 hypothesize(null = "independence") |>
 generate(reps = 1000, type = "permute") |>
 fit()

null_fits

# fit a linear model to the observed data
obs_fit <- gss |>
  specify(hours ~ age + college) |>
  fit()

obs_fit

# get confidence intervals for each term
conf_ints <-
  get_confidence_interval(
    null_fits,
    point_estimate = obs_fit,
    level = .95
  )

# visualize distributions of coefficients
# generated under the null
visualize(null_fits)

# add a confidence interval shading layer to juxtapose
# the null fits with the observed fit for each term
visualize(null_fits) +
  shade_confidence_interval(conf_ints)
}

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other visualization functions: 
\code{\link{shade_p_value}()}
}
\concept{visualization functions}


================================================
FILE: man/shade_p_value.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/shade_p_value.R
\name{shade_p_value}
\alias{shade_p_value}
\alias{shade_pvalue}
\title{Shade histogram area beyond an observed statistic}
\usage{
shade_p_value(obs_stat, direction, color = "red2", fill = "pink", ...)

shade_pvalue(obs_stat, direction, color = "red2", fill = "pink", ...)
}
\arguments{
\item{obs_stat}{The observed statistic or estimate. For
\code{\link[=calculate]{calculate()}}-based workflows, this will be a 1-element numeric vector or
a \verb{1 x 1} data frame containing the observed statistic.
For \code{\link[=fit.infer]{fit()}}-based workflows, a \verb{(p + 1) x 2} data frame
with columns \code{term} and \code{estimate} giving the observed estimate for
each term.}

\item{direction}{A string specifying in which direction the shading should
occur. Options are \code{"less"}, \code{"greater"}, or \code{"two-sided"}. Can
also give \code{"left"}, \code{"right"}, \code{"both"}, \code{"two_sided"}, \code{"two sided"},
or \code{"two.sided"}. If \code{NULL}, the function will not shade any area.}

\item{color}{A character or hex string specifying the color of the observed
statistic as a vertical line on the plot.}

\item{fill}{A character or hex string specifying the color to shade the
p-value region. If \code{NULL}, the function will not shade any area.}

\item{...}{Other arguments passed along to ggplot2 functions.
For expert use only.}
}
\value{
If added to an existing infer visualization, a ggplot2
object displaying the supplied statistic on top of its corresponding
distribution. Otherwise, an \code{infer_layer} list.
}
\description{
\code{shade_p_value()} plots a p-value region on top of
\code{\link[=visualize]{visualize()}} output. The output is a ggplot2 layer that can be added with
\code{+}. The function has a shorter alias, \code{shade_pvalue()}.

Learn more in \code{vignette("infer")}.
}
\examples{
# find the point estimate---mean number of hours worked per week
point_estimate <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t")

# ...and a null distribution
null_dist <- gss |>
  # ...we're interested in the number of hours worked per week
  specify(response = hours) |>
  # hypothesizing that the mean is 40
  hypothesize(null = "point", mu = 40) |>
  # generating data points for a null distribution
  generate(reps = 1000, type = "bootstrap") |>
  # estimating the null distribution
  calculate(stat = "t")

# shade the p-value of the point estimate
null_dist |>
  visualize() +
  shade_p_value(obs_stat = point_estimate, direction = "two-sided")

# you can shade confidence intervals on top of
# theoretical distributions, too!
null_dist_theory <- gss |>
  specify(response = hours) |>
  assume(distribution = "t")

null_dist_theory |>
  visualize() +
  shade_p_value(obs_stat = point_estimate, direction = "two-sided")

\donttest{
# to visualize distributions of coefficients for multiple
# explanatory variables, use a `fit()`-based workflow

# fit 1000 linear models with the `hours` variable permuted
null_fits <- gss |>
 specify(hours ~ age + college) |>
 hypothesize(null = "independence") |>
 generate(reps = 1000, type = "permute") |>
 fit()

null_fits

# fit a linear model to the observed data
obs_fit <- gss |>
  specify(hours ~ age + college) |>
  fit()

obs_fit

# visualize distributions of coefficients
# generated under the null
visualize(null_fits)

# add a p-value shading layer to juxtapose the null
# fits with the observed fit for each term
visualize(null_fits) +
  shade_p_value(obs_fit, direction = "both")

# the direction argument will be applied
# to the plot for each term
visualize(null_fits) +
  shade_p_value(obs_fit, direction = "left")
}

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other visualization functions: 
\code{\link{shade_confidence_interval}()}
}
\concept{visualization functions}


================================================
FILE: man/specify.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/specify.R
\name{specify}
\alias{specify}
\title{Specify response and explanatory variables}
\usage{
specify(x, formula, response = NULL, explanatory = NULL, success = NULL)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{formula}{A formula with the response variable on the left and the
explanatory on the right. Alternatively, a \code{response} and \code{explanatory}
argument can be supplied.}

\item{response}{The variable name in \code{x} that will serve as the response.
This is an alternative to using the \code{formula} argument.}

\item{explanatory}{The variable name in \code{x} that will serve as the
explanatory variable. This is an alternative to using the formula argument.}

\item{success}{The level of \code{response} that will be considered a success, as
a string. Needed for inference on one proportion, a difference in
proportions, and corresponding z stats.}
}
\value{
A tibble containing the response (and explanatory, if specified)
variable data.
}
\description{
\code{specify()} is used to specify which columns in the supplied data frame are
the relevant response (and, if applicable, explanatory) variables. Note that
character variables are converted to \code{factor}s.

Learn more in \code{vignette("infer")}.
}
\examples{
# specifying for a point estimate on one variable
gss |>
   specify(response = age)

# specify a relationship between variables as a formula...
gss |>
  specify(age ~ partyid)

# ...or with named arguments!
gss |>
  specify(response = age, explanatory = partyid)

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other core functions: 
\code{\link{calculate}()},
\code{\link{generate}()},
\code{\link{hypothesize}()}
}
\concept{core functions}


================================================
FILE: man/t_stat.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wrappers.R
\name{t_stat}
\alias{t_stat}
\title{Tidy t-test statistic}
\usage{
t_stat(
  x,
  formula,
  response = NULL,
  explanatory = NULL,
  order = NULL,
  alternative = "two-sided",
  mu = 0,
  conf_int = FALSE,
  conf_level = 0.95,
  ...
)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{formula}{A formula with the response variable on the left and the
explanatory on the right. Alternatively, a \code{response} and \code{explanatory}
argument can be supplied.}

\item{response}{The variable name in \code{x} that will serve as the response.
This is an alternative to using the \code{formula} argument.}

\item{explanatory}{The variable name in \code{x} that will serve as the
explanatory variable. This is an alternative to using the formula argument.}

\item{order}{A string vector of specifying the order in which the levels of
the explanatory variable should be ordered for subtraction, where \code{order = c("first", "second")} means \code{("first" - "second")}.}

\item{alternative}{Character string giving the direction of the alternative
hypothesis. Options are \code{"two-sided"} (default), \code{"greater"}, or \code{"less"}.}

\item{mu}{A numeric value giving the hypothesized null mean value for a one
sample test and the hypothesized difference for a two sample test.}

\item{conf_int}{A logical value for whether to include the confidence
interval or not. \code{TRUE} by default.}

\item{conf_level}{A numeric value between 0 and 1. Default value is 0.95.}

\item{...}{Pass in arguments to infer functions.}
}
\description{
A shortcut wrapper function to get the observed test statistic for a t test.
This function has been deprecated in favor of the more general \code{\link[=observe]{observe()}}.
}
\examples{
library(tidyr)

# t test statistic for true mean number of hours worked
# per week of 40
gss |>
   t_stat(response = hours, mu = 40)

# t test statistic for number of hours worked per week
# by college degree status
gss |>
   tidyr::drop_na(college) |>
   t_stat(formula = hours ~ college,
      order = c("degree", "no degree"),
      alternative = "two-sided")

}
\seealso{
Other wrapper functions: 
\code{\link{chisq_stat}()},
\code{\link{chisq_test}()},
\code{\link{observe}()},
\code{\link{prop_test}()},
\code{\link{t_test}()}

Other functions for calculating observed statistics: 
\code{\link{chisq_stat}()},
\code{\link{observe}()}
}
\concept{functions for calculating observed statistics}
\concept{wrapper functions}


================================================
FILE: man/t_test.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wrappers.R
\name{t_test}
\alias{t_test}
\title{Tidy t-test}
\usage{
t_test(
  x,
  formula,
  response = NULL,
  explanatory = NULL,
  order = NULL,
  alternative = "two-sided",
  mu = 0,
  conf_int = TRUE,
  conf_level = 0.95,
  ...
)
}
\arguments{
\item{x}{A data frame that can be coerced into a \link[tibble:tibble]{tibble}.}

\item{formula}{A formula with the response variable on the left and the
explanatory on the right. Alternatively, a \code{response} and \code{explanatory}
argument can be supplied.}

\item{response}{The variable name in \code{x} that will serve as the response.
This is an alternative to using the \code{formula} argument.}

\item{explanatory}{The variable name in \code{x} that will serve as the
explanatory variable. This is an alternative to using the formula argument.}

\item{order}{A string vector of specifying the order in which the levels of
the explanatory variable should be ordered for subtraction, where \code{order = c("first", "second")} means \code{("first" - "second")}.}

\item{alternative}{Character string giving the direction of the alternative
hypothesis. Options are \code{"two-sided"} (default), \code{"greater"}, or \code{"less"}.}

\item{mu}{A numeric value giving the hypothesized null mean value for a one
sample test and the hypothesized difference for a two sample test.}

\item{conf_int}{A logical value for whether to include the confidence
interval or not. \code{TRUE} by default.}

\item{conf_level}{A numeric value between 0 and 1. Default value is 0.95.}

\item{...}{For passing in other arguments to \link[stats:t.test]{t.test()}.}
}
\description{
A tidier version of \link[stats:t.test]{t.test()} for two sample tests.
}
\examples{
library(tidyr)

# t test for number of hours worked per week
# by college degree status
gss |>
   tidyr::drop_na(college) |>
   t_test(formula = hours ~ college,
      order = c("degree", "no degree"),
      alternative = "two-sided")

# see vignette("infer") for more explanation of the
# intuition behind the infer package, and vignette("t_test")
# for more examples of t-tests using infer

}
\seealso{
Other wrapper functions: 
\code{\link{chisq_stat}()},
\code{\link{chisq_test}()},
\code{\link{observe}()},
\code{\link{prop_test}()},
\code{\link{t_stat}()}
}
\concept{wrapper functions}


================================================
FILE: man/visualize.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/visualize.R
\name{visualize}
\alias{visualize}
\alias{visualise}
\title{Visualize statistical inference}
\usage{
visualize(data, bins = 15, method = "simulation", dens_color = "black", ...)

visualise(data, bins = 15, method = "simulation", dens_color = "black", ...)
}
\arguments{
\item{data}{A distribution. For simulation-based inference, a data frame
containing a distribution of \code{\link[=calculate]{calculate()}}d statistics
or \code{\link[=fit.infer]{fit()}}ted coefficient estimates. This object should
have been passed to \code{\link[=generate]{generate()}} before being supplied or
\code{\link[=calculate]{calculate()}} to \code{\link[=fit.infer]{fit()}}. For theory-based inference,
the output of \code{\link[=assume]{assume()}}.}

\item{bins}{The number of bins in the histogram.}

\item{method}{A string giving the method to display. Options are
\code{"simulation"}, \code{"theoretical"}, or \code{"both"} with \code{"both"} corresponding to
\code{"simulation"} and \code{"theoretical"}. If \code{data} is the output of \code{\link[=assume]{assume()}},
this argument will be ignored and default to \code{"theoretical"}.}

\item{dens_color}{A character or hex string specifying the color of the
theoretical density curve.}

\item{...}{Additional arguments passed along to functions in ggplot2.
For \code{method = "simulation"}, \code{stat_bin()}, and for \code{method = "theoretical"},
\code{geom_path()}. Some values may be overwritten by infer internally.}
}
\value{
For \code{\link[=calculate]{calculate()}}-based workflows, a ggplot showing the simulation-based
distribution as a histogram or bar graph. Can also be used to display
theoretical distributions.

For \code{\link[=assume]{assume()}}-based workflows, a ggplot showing the theoretical distribution.

For \code{\link[=fit.infer]{fit()}}-based workflows, a \code{patchwork} object
showing the simulation-based distributions as a histogram or bar graph.
The interface to adjust plot options and themes is a bit different
for \code{patchwork} plots than ggplot2 plots. The examples highlight the
biggest differences here, but see \code{\link[patchwork:plot_annotation]{patchwork::plot_annotation()}} and
\link[patchwork:plot_arithmetic]{patchwork::&.gg} for more details.
}
\description{
Visualize the distribution of the simulation-based inferential statistics or
the theoretical distribution (or both!).

Learn more in \code{vignette("infer")}.
}
\details{
In order to make the visualization workflow more straightforward
and explicit, \code{visualize()} now only should be used to plot distributions
of statistics directly. A number of arguments related to shading p-values and
confidence intervals are now deprecated in \code{visualize()} and should
now be passed to \code{\link[=shade_p_value]{shade_p_value()}} and \code{\link[=shade_confidence_interval]{shade_confidence_interval()}},
respectively. \code{\link[=visualize]{visualize()}} will raise a warning if deprecated arguments
are supplied.
}
\examples{

# generate a null distribution
null_dist <- gss |>
  # we're interested in the number of hours worked per week
  specify(response = hours) |>
  # hypothesizing that the mean is 40
  hypothesize(null = "point", mu = 40) |>
  # generating data points for a null distribution
  generate(reps = 1000, type = "bootstrap") |>
  # calculating a distribution of means
  calculate(stat = "mean")

# or a bootstrap distribution, omitting the hypothesize() step,
# for use in confidence intervals
boot_dist <- gss |>
  specify(response = hours) |>
  generate(reps = 1000, type = "bootstrap") |>
  calculate(stat = "mean")

# we can easily plot the null distribution by piping into visualize
null_dist |>
  visualize()

# we can add layers to the plot as in ggplot, as well...
# find the point estimate---mean number of hours worked per week
point_estimate <- gss |>
  specify(response = hours) |>
  calculate(stat = "mean")

# find a confidence interval around the point estimate
ci <- boot_dist |>
  get_confidence_interval(point_estimate = point_estimate,
                          # at the 95\% confidence level
                          level = .95,
                          # using the standard error method
                          type = "se")

# display a shading of the area beyond the p-value on the plot
null_dist |>
  visualize() +
  shade_p_value(obs_stat = point_estimate, direction = "two-sided")

# ...or within the bounds of the confidence interval
null_dist |>
  visualize() +
  shade_confidence_interval(ci)

# plot a theoretical sampling distribution by creating
# a theory-based distribution with `assume()`
sampling_dist <- gss |>
  specify(response = hours) |>
  assume(distribution = "t")

visualize(sampling_dist)

# you can shade confidence intervals on top of
# theoretical distributions, too---the theoretical
# distribution will be recentered and rescaled to
# align with the confidence interval
visualize(sampling_dist) +
  shade_confidence_interval(ci)


# to plot both a theory-based and simulation-based null distribution,
# use a theorized statistic (i.e. one of t, z, F, or Chisq)
# and supply the simulation-based null distribution
null_dist_t <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  generate(reps = 1000, type = "bootstrap") |>
  calculate(stat = "t")

obs_stat <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t")

visualize(null_dist_t, method = "both")

visualize(null_dist_t, method = "both") +
  shade_p_value(obs_stat, "both")

\donttest{
# to visualize distributions of coefficients for multiple
# explanatory variables, use a `fit()`-based workflow

# fit 1000 models with the `hours` variable permuted
null_fits <- gss |>
 specify(hours ~ age + college) |>
 hypothesize(null = "independence") |>
 generate(reps = 1000, type = "permute") |>
 fit()

null_fits

# visualize distributions of resulting coefficients
visualize(null_fits)

# the interface to add themes and other elements to patchwork
# plots (outputted by `visualize` when the inputted data
# is from the `fit()` function) is a bit different than adding
# them to ggplot2 plots.
library(ggplot2)

# to add a ggplot2 theme to a `calculate()`-based visualization, use `+`
null_dist |> visualize() + theme_dark()

# to add a ggplot2 theme to a `fit()`-based visualization, use `&`
null_fits |> visualize() & theme_dark()
}

# More in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
\code{\link[=shade_p_value]{shade_p_value()}}, \code{\link[=shade_confidence_interval]{shade_confidence_interval()}}.
}


================================================
FILE: man-roxygen/seeds.Rmd
================================================
# Reproducibility

When using the infer package for research, or in other cases when exact reproducibility is a priority, be sure the set the seed for R's random number generator. infer will respect the random seed specified in the `set.seed()` function, returning the same result when `generate()`ing data given an identical seed. For instance, we can calculate the difference in mean `age` by `college` degree status using the `gss` dataset from 10 versions of the `gss` resampled with permutation using the following code.

```{r, include = FALSE}
library(infer)
```

```{r}
set.seed(1)

gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 5, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
```

Setting the seed to the same value again and rerunning the same code will produce the same result.

```{r}
# set the seed
set.seed(1)

gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 5, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
```

Please keep this in mind when writing infer code that utilizes resampling with `generate()`.


================================================
FILE: tests/testthat/_snaps/aliases.md
================================================
# old aliases produce informative error

    Code
      res <- p_value(gss_calc, obs_stat = -0.2, direction = "right")
    Condition
      Error:
      ! `conf_int()` was deprecated in infer 0.4.0 and is now defunct.
      i Please use `get_p_value()` instead.

---

    Code
      res_ <- conf_int(gss_permute)
    Condition
      Error:
      ! `conf_int()` was deprecated in infer 0.4.0 and is now defunct.
      i Please use `get_confidence_interval()` instead.


================================================
FILE: tests/testthat/_snaps/assume.md
================================================
# assume errors with bad arguments

    Code
      assume(hypothesize(specify(gss, age ~ college), null = "independence"), "boop",
      nrow(gss) - 1)
    Condition
      Error in `assume()`:
      ! The distribution argument must be one of "Chisq", "F", "t", or "z".

---

    Code
      assume(hypothesize(specify(gss, age ~ college), null = "independence"), "t", c(
        nrow(gss) - 1, 2))
    Condition
      Error in `assume()`:
      ! A T distribution requires 1 degrees of freedom argument, but 2 were supplied.

---

    Code
      assume(hypothesize(specify(gss, age ~ partyid), null = "independence"), "F",
      nrow(gss) - 1)
    Condition
      Error in `assume()`:
      ! An F distribution requires 2 degrees of freedom arguments, but 1 was supplied.

---

    Code
      assume(hypothesize(specify(gss, age ~ partyid), null = "independence"), "F",
      "boop")
    Condition
      Error in `assume()`:
      ! `assume()` expects the `df` argument to be a numeric vector, but you supplied a character object.

---

    Code
      assume(hypothesize(specify(gss, age ~ partyid), null = "independence"), "F",
      nrow(gss) - 1, 1)
    Condition
      Error in `assume()`:
      ! `assume()` ignores the dots `...` argument, though the argument were supplied.
      i Did you forget to concatenate the `df` argument with `c()`?

---

    Code
      assume(hypothesize(specify(gss, age ~ partyid), null = "independence"), "F",
      nrow(gss) - 1, 1, 2)
    Condition
      Error in `assume()`:
      ! `assume()` ignores the dots `...` argument, though the arguments were supplied.
      i Did you forget to concatenate the `df` argument with `c()`?

---

    Code
      assume(hypothesize(specify(gss, age ~ finrela), null = "independence"), "t",
      nrow(gss) - 1)
    Condition
      Error in `assume()`:
      ! The supplied distribution "t" is not well-defined for a numeric response variable (age) and a multinomial categorical explanatory variable (finrela).

---

    Code
      assume(hypothesize(specify(gss, age ~ finrela), null = "independence"), "z",
      nrow(gss) - 1)
    Condition
      Error in `assume()`:
      ! The supplied distribution "z" is not well-defined for a numeric response variable (age) and a multinomial categorical explanatory variable (finrela).

---

    Code
      assume(hypothesize(specify(gss, age ~ NULL), null = "point", mu = 40), "z",
      nrow(gss) - 1)
    Condition
      Error in `assume()`:
      ! The supplied distribution "z" is not well-defined for a numeric response variable (age) and no explanatory variable.

---

    Code
      assume(gss, "z", nrow(gss) - 1)
    Condition
      Error in `assume()`:
      ! The `x` argument must be the output of a core infer function, likely `specify()` or `hypothesize()`.

---

    Code
      assume("boop", "z", nrow(gss) - 1)
    Condition
      Error in `assume()`:
      ! The `x` argument must be the output of a core infer function, likely `specify()` or `hypothesize()`.

# assume() handles automatic df gracefully

    Code
      res_ <- assume(hypothesize(specify(gss, response = hours), null = "point", mu = 40),
      "t", nrow(gss) - 2)
    Message
      Message: The supplied `df` argument does not match its expected value. If this is unexpected, ensure that your calculation for `df` is correct (see `assume()` (`?infer::assume()`) for recognized values) or supply `df = NULL` to `assume()`.


================================================
FILE: tests/testthat/_snaps/calculate.md
================================================
# x is a tibble

    Code
      calculate(vec, stat = "mean")
    Condition
      Error in `calculate()`:
      ! `x` must be 'tibble', not 'integer'.

# calculate checks `stat` argument

    Code
      calculate(gss_tbl, stat = 3)
    Condition
      Error in `calculate()`:
      ! `stat` must be 'string', not 'double'.

---

    Code
      calculate(gen_gss_slope, stat = "slopee")
    Condition
      Error in `check_calculate_stat()`:
      ! `stat` must be one of "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z", "ratio of props", "ratio of means", or "odds ratio", not "slopee".
      i Did you mean "slope"?

---

    Code
      calculate(gen_gss_slope, stat = "stdev")
    Condition
      Error in `check_calculate_stat()`:
      ! `stat` must be one of "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z", "ratio of props", "ratio of means", or "odds ratio", not "stdev".

---

    Code
      calculate(gen_gss_slope, stat = "stat")
    Condition
      Error in `check_calculate_stat()`:
      ! `stat` must be one of "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z", "ratio of props", "ratio of means", or "odds ratio", not "stat".

---

    Code
      calculate(gen_gss_slope, stat = "chi sq")
    Condition
      Error in `check_calculate_stat()`:
      ! `stat` must be one of "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "slope", "correlation", "t", "z", "ratio of props", "ratio of means", or "odds ratio", not "chi sq".
      i Did you mean "Chisq"?

# errors informatively with incompatible stat vs hypothesis

    Code
      calculate(hypothesise(specify(gss, college ~ sex, success = "degree"), null = "point",
      p = 0.4), stat = "diff in props", order = c("female", "male"))
    Condition
      Error in `calculate()`:
      ! The supplied statistic `stat = "diff in props"` is incompatible with the supplied hypothesis `null = "point"`.

---

    Code
      calculate(generate(hypothesise(specify(gss, college ~ sex, success = "degree"),
      null = "point", p = 0.4), reps = 10, type = "draw"), stat = "diff in props",
      order = c("female", "male"))
    Condition
      Error in `calculate()`:
      ! The supplied statistic `stat = "diff in props"` is incompatible with the supplied hypothesis `null = "point"`.

# response attribute has been set

    Code
      calculate(tibble::as_tibble(gss), stat = "median")
    Condition
      Error in `dplyr::filter()`:
      i In argument: `resp == response_type & exp == explanatory_type`.
      Caused by error:
      ! `..1` must be of size 10 or 1, not size 0.

# variable chosen is of appropriate class (one var problems)

    Code
      calculate(gen_gss1, stat = "mean")
    Condition
      Error in `calculate()`:
      ! A mean is not well-defined for a multinomial categorical response variable (partyid) and no explanatory variable.

---

    Code
      calculate(gen_gss_num, stat = "prop")
    Condition
      Error in `calculate()`:
      ! A proportion is not well-defined for a numeric response variable (hours) and no explanatory variable.

---

    Code
      calculate(gen_gss_num, stat = "median")
    Condition
      Error in `calculate()`:
      ! `"mu"` does not correspond to `stat = "median"`.

---

    Code
      calculate(gen_gss_num, stat = "sd")
    Condition
      Error in `calculate()`:
      ! `"mu"` does not correspond to `stat = "sd"`.

---

    Code
      calculate(gen_gss_num2, stat = "prop")
    Condition
      Error in `calculate()`:
      ! A proportion is not well-defined for a numeric response variable (hours) and no explanatory variable.

---

    Code
      calculate(gen_gss_num2, stat = "mean")
    Condition
      Error in `calculate()`:
      ! `stat == "mean"` requires `"mu"` to be set in `hypothesize()`.

---

    Code
      calculate(gen_gss_num2, stat = "sd")
    Condition
      Error in `calculate()`:
      ! `"med"` does not correspond to `stat = "sd"`.

---

    Code
      calculate(gen_gss_num3, stat = "prop")
    Condition
      Error in `calculate()`:
      ! A proportion is not well-defined for a numeric response variable (hours) and no explanatory variable.

---

    Code
      calculate(gen_gss_num3, stat = "mean")
    Condition
      Error in `calculate()`:
      ! `stat == "mean"` requires `"mu"` to be set in `hypothesize()`.

---

    Code
      calculate(gen_gss_num3, stat = "median")
    Condition
      Error in `calculate()`:
      ! `stat == "median"` requires `"med"` to be set in `hypothesize()`.

# grouping (explanatory) variable is a factor (two var problems)

    Code
      calculate(gen_gss2, stat = "diff in means")
    Condition
      Error in `calculate()`:
      ! A difference in means is not well-defined for a numeric response variable (hours) and a numeric explanatory variable (age).

---

    Code
      calculate(gen_gss2, stat = "diff in medians")
    Condition
      Error in `calculate()`:
      ! A difference in medians is not well-defined for a numeric response variable (hours) and a numeric explanatory variable (age).

# grouping (explanatory) variable is numeric (two var problems)

    Code
      calculate(gen_gss2a, stat = "slope")
    Condition
      Error in `calculate()`:
      ! The infer team has not implemented test statistics for the supplied variable types.

---

    Code
      calculate(gen_gss2a, stat = "t")
    Condition
      Error in `calculate()`:
      ! The infer team has not implemented test statistics for the supplied variable types.

---

    Code
      calculate(gen_gss2a, stat = "diff in medians")
    Condition
      Error in `calculate()`:
      ! The infer team has not implemented test statistics for the supplied variable types.

# response variable is a factor (two var problems)

    Code
      calculate(gen_gss3, stat = "Chisq")
    Condition
      Error in `calculate()`:
      ! A chi-square statistic is not well-defined for a numeric response variable (hours) and a multinomial categorical explanatory variable (partyid).

---

    Code
      calculate(gen_gss4, stat = "diff in props")
    Condition
      Error in `calculate()`:
      ! A difference in proportions is not well-defined for a dichotomous categorical response variable (sex) and a multinomial categorical explanatory variable (partyid).

---

    Code
      calculate(gen_gss4, stat = "ratio of props")
    Condition
      Error in `calculate()`:
      ! A ratio of proportions is not well-defined for a dichotomous categorical response variable (sex) and a multinomial categorical explanatory variable (partyid).

---

    Code
      calculate(gen_gss4, stat = "odds ratio")
    Condition
      Error in `calculate()`:
      ! An odds ratio is not well-defined for a dichotomous categorical response variable (sex) and a multinomial categorical explanatory variable (partyid).

---

    Code
      calculate(gen_gss4, stat = "t")
    Condition
      Error in `calculate()`:
      ! A t statistic is not well-defined for a dichotomous categorical response variable (sex) and a multinomial categorical explanatory variable (partyid).

---

    Code
      res_ <- calculate(gen_gss4a, stat = "z")
    Condition
      Warning:
      The statistic is based on a difference or ratio; by default, for difference-based statistics, the explanatory variable is subtracted in the order "male" - "female", or divided in the order "male" / "female" for ratio-based statistics. To specify this order yourself, supply `order = c("male", "female")` to the calculate() function.

# response variable is numeric (two var problems)

    Code
      calculate(gen_gss5, stat = "F")
    Condition
      Error in `calculate()`:
      ! The infer team has not implemented test statistics for the supplied variable types.

# two sample mean-type problems are working

    Code
      res_ <- calculate(gen_gss5a, stat = "diff in means")
    Condition
      Warning:
      The statistic is based on a difference or ratio; by default, for difference-based statistics, the explanatory variable is subtracted in the order "no degree" - "degree", or divided in the order "no degree" / "degree" for ratio-based statistics. To specify this order yourself, supply `order = c("no degree", "degree")` to the calculate() function.

---

    Code
      res_ <- calculate(gen_gss5a, stat = "t")
    Condition
      Warning:
      The statistic is based on a difference or ratio; by default, for difference-based statistics, the explanatory variable is subtracted in the order "no degree" - "degree", or divided in the order "no degree" / "degree" for ratio-based statistics. To specify this order yourself, supply `order = c("no degree", "degree")` to the calculate() function.

# properties of tibble passed-in are correct

    Code
      calculate(gen_gss6)
    Condition
      Error in `calculate()`:
      ! `stat` must be 'string', not 'character'.

# chi-square matches chisq.test value

    Code
      calculate(specify(dat, action ~ sex, success = "promote"), stat = "Chisq",
      order = c("male", "female"), correct = "boop")
    Condition
      Error in `dplyr::summarise()`:
      i In argument: `stat = chisq_indep(data)`.
      i In row 1.
      Caused by error in `correct && nrow(x) == 2L`:
      ! invalid 'x' type in 'x && y'

# chi-square works with factors with unused levels

    Code
      out <- pull(calculate(specify(test_tbl, y ~ x), stat = "Chisq"))

---

    Code
      out <- pull(calculate(specify(test_tbl, y ~ x), stat = "Chisq"))

# `order` is working

    Code
      calculate(gen_gss_tbl10, stat = "diff in means", order = c(TRUE, FALSE))
    Condition
      Error in `calculate()`:
      ! TRUE is not a level of the explanatory variable.

---

    Code
      calculate(gen_gss_tbl11, stat = "diff in medians", order = "no degree")
    Condition
      Error in `calculate()`:
      ! Only one level specified in `order`. Both levels need to be specified.

---

    Code
      calculate(gen_gss_tbl11, stat = "diff in medians", order = c(NA, "no degree"))
    Condition
      Error in `calculate()`:
      ! Only one level specified in `order`. Both levels need to be specified.

---

    Code
      calculate(gen_gss_tbl11, stat = "diff in medians", order = c("no degree",
        "other"))
    Condition
      Error in `calculate()`:
      ! other is not a level of the explanatory variable.

---

    Code
      calculate(gen_gss_tbl11, stat = "diff in means", order = c("no degree",
        "degree", "the last one"))
    Condition
      Error in `calculate()`:
      ! `order` is expecting only two entries.

---

    Code
      res_ <- calculate(gen_gss_tbl11, stat = "diff in means")
    Condition
      Warning:
      The statistic is based on a difference or ratio; by default, for difference-based statistics, the explanatory variable is subtracted in the order "no degree" - "degree", or divided in the order "no degree" / "degree" for ratio-based statistics. To specify this order yourself, supply `order = c("no degree", "degree")` to the calculate() function.

# NULL response gives error

    Code
      calculate(gss_tbl_improp, stat = "mean")
    Condition
      Error in `dplyr::filter()`:
      i In argument: `resp == response_type & exp == explanatory_type`.
      Caused by error:
      ! `..1` must be of size 10 or 1, not size 0.

# order being given when not needed gives warning

    Code
      res_ <- calculate(gen_gss_tbl15, stat = "Chisq", order = c("dem", "ind"))
    Condition
      Warning:
      Statistic is not based on a difference or ratio; the `order` argument will be ignored. Check `calculate()` (`?infer::calculate()`) for details.

# specify() |> calculate() works

    Code
      res_ <- calculate(hypothesize(specify(gss_tbl, hours ~ NULL), null = "point",
      mu = 4), stat = "mean")
    Message
      Message: The point null hypothesis `mu = 4` does not inform calculation of the observed statistic (a mean) and will be ignored.

---

    Code
      res_ <- calculate(specify(gss_tbl, partyid ~ NULL), stat = "Chisq")
    Condition
      Warning:
      A chi-square statistic requires a null hypothesis to calculate the observed statistic.
      Output assumes the following null values: `p = c(dem = 0.333333333333333, ind = 0.333333333333333, rep = 0.333333333333333)`.

# One sample t hypothesis test is working

    Code
      res_ <- calculate(generate(hypothesize(specify(gss_tbl, hours ~ NULL), null = "point",
      mu = 1), reps = 10), stat = "t")

---

    Code
      res_ <- calculate(specify(gss_tbl, response = hours), stat = "t")
    Condition
      Warning:
      A t statistic requires a null hypothesis to calculate the observed statistic.
      Output assumes the following null value: `mu = 0`.

# specify done before calculate

    Code
      calculate(gss_tbl_mean, stat = "mean")
    Condition
      Error in `dplyr::filter()`:
      i In argument: `resp == response_type & exp == explanatory_type`.
      Caused by error:
      ! `..1` must be of size 10 or 1, not size 0.

---

    Code
      calculate(gss_tbl_prop, stat = "prop")
    Condition
      Error in `dplyr::filter()`:
      i In argument: `resp == response_type & exp == explanatory_type`.
      Caused by error:
      ! `..1` must be of size 10 or 1, not size 0.

---

    Code
      calculate(gss_tbl_prop, stat = "count")
    Condition
      Error in `dplyr::filter()`:
      i In argument: `resp == response_type & exp == explanatory_type`.
      Caused by error:
      ! `..1` must be of size 10 or 1, not size 0.

# chisq GoF has params specified for observed stat

    Code
      res_ <- calculate(no_params, stat = "Chisq")
    Condition
      Warning:
      A chi-square statistic requires a null hypothesis to calculate the observed statistic.
      Output assumes the following null values: `p = c(dem = 0.333333333333333, ind = 0.333333333333333, rep = 0.333333333333333)`.

# One sample t bootstrap is working

    Code
      res_ <- calculate(generate(specify(gss_tbl, hours ~ NULL), reps = 10, type = "bootstrap"),
      stat = "t")
    Condition
      Warning:
      A t statistic requires a null hypothesis to calculate the observed statistic.
      Output assumes the following null value: `mu = 0`.

# calculate warns informatively with insufficient null

    Code
      res_ <- calculate(specify(gss, response = sex, success = "female"), stat = "z")
    Condition
      Warning:
      A z statistic requires a null hypothesis to calculate the observed statistic.
      Output assumes the following null value: `p = .5`.

---

    Code
      res_ <- calculate(specify(gss, hours ~ NULL), stat = "t")
    Condition
      Warning:
      A t statistic requires a null hypothesis to calculate the observed statistic.
      Output assumes the following null value: `mu = 0`.

---

    Code
      res_ <- calculate(specify(gss, response = partyid), stat = "Chisq")
    Condition
      Warning:
      A chi-square statistic requires a null hypothesis to calculate the observed statistic.
      Output assumes the following null values: `p = c(dem = 0.2, ind = 0.2, rep = 0.2, other = 0.2, DK = 0.2)`.

# calculate messages informatively with excessive null

    Code
      res_ <- calculate(hypothesize(specify(gss, hours ~ NULL), null = "point", mu = 40),
      stat = "mean")
    Message
      Message: The point null hypothesis `mu = 40` does not inform calculation of the observed statistic (a mean) and will be ignored.

---

    Code
      res_ <- calculate(hypothesize(specify(gss, hours ~ NULL), null = "point",
      sigma = 10), stat = "sd")
    Message
      Message: The point null hypothesis `sigma = 10` does not inform calculation of the observed statistic (a standard deviation) and will be ignored.

---

    Code
      res_ <- calculate(hypothesize(specify(gss, hours ~ college), null = "independence"),
      "diff in means", order = c("no degree", "degree"))
    Message
      Message: The independence null hypothesis does not inform calculation of the observed statistic (a difference in means) and will be ignored.

# calculate errors out with multiple explanatory variables

    Code
      calculate(hypothesize(specify(gss, hours ~ age + college), null = "independence"),
      stat = "t")
    Condition
      Error in `calculate()`:
      ! Multiple explanatory variables are not supported in `calculate()`.
      i When working with multiple explanatory variables, use `fit()` (`?infer::fit.infer()`) instead.

---

    Code
      calculate(generate(hypothesize(specify(gss, hours ~ age + college), null = "independence"),
      reps = 3, type = "permute"), stat = "t")
    Condition
      Error in `calculate()`:
      ! Multiple explanatory variables are not supported in `calculate()`.
      i When working with multiple explanatory variables, use `fit()` (`?infer::fit.infer()`) instead.

# arbitrary test statistic works

    Code
      calculate(specify(gss, response = hours), stat = function(x, ...) {
        mean(x$hour)
      })
    Condition
      Error in `calculate()`:
      ! The supplied `stat` function encountered an issue.
      Caused by warning:
      ! Unknown or uninitialised column: `hour`.

---

    Code
      calculate(specify(gss, response = hours), stat = function(x, ...) {
        mean("hey there")
      })
    Condition
      Error in `calculate()`:
      ! The supplied `stat` function encountered an issue.
      Caused by warning in `mean.default()`:
      ! argument is not numeric or logical: returning NA

---

    Code
      calculate(specify(gss, response = hours), stat = function(x, ...) {
        data.frame(woops = mean(x$hours))
      })
    Condition
      Error in `calculate()`:
      ! The supplied `stat` function must return a scalar value.
      i It returned a data frame.

---

    Code
      calculate(specify(gss, response = hours), stat = function(x, ...) {
        identity
      })
    Condition
      Error in `calculate()`:
      ! The supplied `stat` function must return a scalar value.
      i It returned a function.


================================================
FILE: tests/testthat/_snaps/fit.md
================================================
# fit.infer messages informatively on excessive null

    Code
      res_ <- fit(hypothesize(specify(gss, hours ~ age + college), null = "independence"))
    Message
      Message: The independence null hypothesis does not inform calculation of the observed fit and will be ignored.

# fit.infer logistic regression works

    Code
      fit(specify(gss, finrela ~ age + college))
    Condition
      Error in `fit()`:
      ! infer does not support fitting models for categorical response variables with more than two levels.
      i Please see `multinom_reg()` from the parsnip package.


================================================
FILE: tests/testthat/_snaps/generate.md
================================================
# cohesion with type argument

    Code
      res_ <- generate(hyp_prop, type = "bootstrap")
    Condition
      Warning:
      You have given `type = "bootstrap"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hyp_chisq_gof, type = "bootstrap")
    Condition
      Warning:
      You have given `type = "bootstrap"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hyp_diff_in_props, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"permute"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hyp_chisq_ind, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"permute"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hyp_mean, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `sample.int()`:
      ! NA in probability vector

---

    Code
      res_ <- generate(hyp_diff_in_means, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"permute"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hyp_anova, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"permute"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hyp_prop, type = "permute")
    Condition
      Warning:
      You have given `type = "permute"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `generate()`:
      ! Please `specify()` an explanatory and a response variable when permuting.

---

    Code
      res_ <- generate(hyp_chisq_gof, type = "permute")
    Condition
      Warning:
      You have given `type = "permute"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `generate()`:
      ! Please `specify()` an explanatory and a response variable when permuting.

---

    Code
      res_ <- generate(hyp_mean, type = "permute")
    Condition
      Warning:
      You have given `type = "permute"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `generate()`:
      ! Please `specify()` an explanatory and a response variable when permuting.

# sensible output

    Code
      generate(hyp_mean, reps = 1, type = "other")
    Condition
      Error in `generate()`:
      ! The `type` argument should be one of "bootstrap", "permute", or "draw". See `generate()` (`?infer::generate()`) for more details.

# auto `type` works (generate)

    Code
      generate(hypothesize(specify(mtcars_df, response = mpg), null = "point", mu = 25),
      reps = 100, type = "permute")
    Condition
      Warning:
      You have given `type = "permute"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `generate()`:
      ! Please `specify()` an explanatory and a response variable when permuting.

---

    Code
      res_ <- generate(specify(mtcars_df, response = mpg), reps = 100, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, response = mpg), null = "point",
      med = 26), reps = 100, type = "permute")
    Condition
      Warning:
      You have given `type = "permute"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `generate()`:
      ! Please `specify()` an explanatory and a response variable when permuting.

---

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, response = am, success = "1"),
      null = "point", p = 0.25), reps = 100, type = "bootstrap")
    Condition
      Warning:
      You have given `type = "bootstrap"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, cyl ~ NULL), null = "point", p = c(
        `4` = 0.5, `6` = 0.25, `8` = 0.25)), reps = 100, type = "bootstrap")
    Condition
      Warning:
      You have given `type = "bootstrap"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, cyl ~ am), null = "independence"),
      reps = 100, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"permute"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, mpg ~ cyl), null = "independence"),
      reps = 100, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"permute"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(specify(mtcars_df, response = am, success = "1"), reps = 100,
      type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(specify(mtcars_df, mpg ~ am), reps = 100, type = "permute")
    Condition
      Warning:
      You have given `type = "permute"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `generate()`:
      ! Permuting should be done only when doing an independence hypothesis test. See `hypothesize()` (`?infer::hypothesize()`).

---

    Code
      res_ <- generate(specify(mtcars_df, am ~ vs, success = "1"), reps = 100, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(specify(mtcars_df, mpg ~ hp), reps = 100, type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.

# mismatches lead to error

    Code
      res_ <- generate(mtcars_df, reps = 10, type = "permute")
    Condition
      Error in `generate()`:
      ! The `variables` argument should be one or more unquoted variable names (not strings in quotation marks).

---

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, am ~ NULL, success = "1"),
      null = "independence", p = c(`1` = 0.5)), reps = 100, type = "draw")
    Condition
      Error in `hypothesize()`:
      ! Please `specify()` an explanatory and a response variable when testing a null hypothesis of `"independence"`.

---

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, cyl ~ NULL), null = "point", p = c(
        `4` = 0.5, `6` = 0.25, `8` = 0.25)), reps = 100, type = "bootstrap")
    Condition
      Warning:
      You have given `type = "bootstrap"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean.

---

    Code
      res_ <- generate(specify(mtcars_df, mpg ~ hp), reps = 100, type = "other")
    Condition
      Error in `generate()`:
      ! The `type` argument should be one of "bootstrap", "permute", or "draw". See `generate()` (`?infer::generate()`) for more details.

# generate() handles `NULL` value of `type`

    Code
      res_ <- generate(hyp_prop, type = NULL)
    Message
      Setting `type = "draw"` in `generate()`.

# variables argument prompts when it ought to

    Code
      res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college), null = "independence"),
      reps = 2, type = "permute", variables = c(howdy))
    Condition
      Error in `generate()`:
      ! The column howdy provided to the `variables` argument is not in the supplied data.

---

    Code
      res <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college), null = "independence"),
      reps = 2, type = "permute", variables = c(howdy, doo))
    Condition
      Error in `generate()`:
      ! The columns howdy and doo provided to the `variables` argument are not in the supplied data.

---

    Code
      res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ NULL), null = "point",
      mu = 40), reps = 2, type = "bootstrap", variables = c(hours))
    Condition
      Warning:
      The `variables` argument is only relevant for the "permute" generation type and will be ignored.

---

    Code
      res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college), null = "independence"),
      reps = 2, type = "permute", variables = "hours")
    Condition
      Error in `generate()`:
      ! The `variables` argument should be one or more unquoted variable names (not strings in quotation marks).

---

    Code
      res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college + age *
        college), null = "independence"), reps = 2, type = "permute", variables = age *
        college)
    Message
      Message: Please supply only data columns to the `variables` argument. Note that any derived effects that depend on these columns will also be affected.

---

    Code
      res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college + age *
        college), null = "independence"), reps = 2, type = "permute", variables = c(
        hours, age * college))
    Message
      Message: Please supply only data columns to the `variables` argument. Note that any derived effects that depend on these columns will also be affected.

---

    Code
      res_ <- generate(specify(gss[1:10, ], hours ~ age * college), reps = 2, type = "bootstrap",
      variables = c(hours, age * college))
    Condition
      Warning:
      The `variables` argument is only relevant for the "permute" generation type and will be ignored.

# type = 'draw'/'simulate' superseding handled gracefully

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, response = am, success = "1"),
      null = "point", p = 0.5), type = "simulate")
    Message
      The `"simulate"` generation type has been renamed to `"draw"`. Use `type = "draw"` instead to quiet this message.

---

    Code
      res_ <- generate(hypothesize(specify(mtcars_df, response = am, success = "1"),
      null = "point", p = 0.5), type = "boop")
    Condition
      Error in `generate()`:
      ! The `type` argument should be one of "bootstrap", "permute", or "draw". See `generate()` (`?infer::generate()`) for more details.

---

    Code
      generate(hypothesize(specify(mtcars_df, response = mpg), null = "point", mu = 20),
      type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `sample.int()`:
      ! NA in probability vector

---

    Code
      generate(hypothesize(specify(mtcars_df, response = mpg), null = "point", mu = 20),
      type = "draw")
    Condition
      Warning:
      You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean.
      Error in `sample.int()`:
      ! NA in probability vector


================================================
FILE: tests/testthat/_snaps/get_confidence_interval.md
================================================
# get_confidence_interval messages with no explicit `level`

    Code
      res_ <- get_confidence_interval(test_df)
    Message
      Using `level = 0.95` to compute confidence interval.

# get_confidence_interval checks input

    Code
      get_confidence_interval(test_df, type = "other")
    Condition
      Error in `get_confidence_interval()`:
      ! The options for `type` are "percentile", "se", or "bias-corrected".

---

    Code
      get_confidence_interval(test_df, level = 1.2)
    Condition
      Error in `get_confidence_interval()`:
      ! The value of `level` must be between 0 and 1, non-inclusive.

---

    Code
      get_confidence_interval(test_df, point_estimate = "a")
    Condition
      Error in `get_confidence_interval()`:
      ! `point_estimate` must be 'numeric', not 'character'.

---

    Code
      get_confidence_interval(test_df, type = "se", point_estimate = "a")
    Condition
      Error in `get_confidence_interval()`:
      ! `point_estimate` must be 'numeric', not 'character'.

---

    Code
      get_confidence_interval(test_df, type = "se", point_estimate = data.frame(p = "a"))
    Condition
      Error in `get_confidence_interval()`:
      ! `point_estimate[[1]][[1]]` must be 'numeric', not 'character'.

---

    Code
      get_confidence_interval(test_df, type = "se")
    Condition
      Error in `get_confidence_interval()`:
      ! A numeric value needs to be given for `point_estimate` for `type` "se" or "bias-corrected".

---

    Code
      get_confidence_interval(test_df, type = "bias-corrected")
    Condition
      Error in `get_confidence_interval()`:
      ! A numeric value needs to be given for `point_estimate` for `type` "se" or "bias-corrected".

# get_confidence_interval can handle fitted objects

    Code
      get_confidence_interval(null_fits, point_estimate = obs_fit_2, level = 0.95)
    Condition
      Error in `get_confidence_interval()`:
      ! The explanatory variables used to generate the distribution of null fits are not the same used to fit the observed data.

---

    Code
      get_confidence_interval(null_fits, point_estimate = obs_fit_3, level = 0.95)
    Condition
      Error in `get_confidence_interval()`:
      ! The response variable of the null fits (hours) is not the same as that of the observed fit (year).

# get_confidence_interval can handle bad args with fitted objects

    Code
      get_confidence_interval(null_fits, point_estimate = "boop", level = 0.95)
    Condition
      Error in `get_confidence_interval()`:
      ! The `point_estimate` argument should be the output of `fit()`.
      i See the documentation with `?get_confidence_interval`.

---

    Code
      get_confidence_interval(null_fits, point_estimate = obs_fit$estimate, level = 0.95)
    Condition
      Error in `get_confidence_interval()`:
      ! The `point_estimate` argument should be the output of `fit()`.
      i See the documentation with `?get_confidence_interval`.

---

    Code
      get_confidence_interval(obs_fit, point_estimate = null_fits, level = 0.95)
    Condition
      Error in `get_confidence_interval()`:
      ! The `x` argument needs to be passed to `generate()` before `fit()`.

# theoretical CIs check arguments properly

    Code
      get_confidence_interval(null_dist_theory, level = 0.95, type = "percentile",
        point_estimate = x_bar)
    Condition
      Error in `get_confidence_interval()`:
      ! The only `type` option for theory-based confidence intervals is `type = "se"`.

---

    Code
      get_confidence_interval(null_dist_theory, level = 0.95, type = "boop",
        point_estimate = x_bar)
    Condition
      Error in `get_confidence_interval()`:
      ! The only `type` option for theory-based confidence intervals is `type = "se"`.

---

    Code
      get_confidence_interval(null_dist_theory, level = 0.95, point_estimate = dplyr::pull(
        x_bar))
    Condition
      Error in `get_confidence_interval()`:
      ! For theoretical confidence intervals, the `point_estimate` argument must be an `infer` object. Have you made sure to supply the output of `calculate()` as the `point_estimate` argument?

---

    Code
      get_confidence_interval(null_dist_theory, level = 0.95, point_estimate = x_bar$
        stat)
    Condition
      Error in `get_confidence_interval()`:
      ! For theoretical confidence intervals, the `point_estimate` argument must be an `infer` object. Have you made sure to supply the output of `calculate()` as the `point_estimate` argument?

---

    Code
      get_confidence_interval(null_dist_theory, level = 0.95, point_estimate = obs_t)
    Condition
      Error in `get_confidence_interval()`:
      ! The only allowable statistics for theoretical confidence intervals are "mean", "prop", "diff in means", and "diff in props". See the "Details" section of `get_confidence_interval()` (`?infer::get_confidence_interval()`) for more details.

---

    Code
      get_confidence_interval(null_dist_theory, level = 0.95, point_estimate = p_hat)
    Condition
      Error in `get_confidence_interval()`:
      ! Confidence intervals using a `t` distribution for `stat = prop` are not implemented.

---

    Code
      get_confidence_interval(null_dist_z, level = 0.95, point_estimate = x_bar)
    Condition
      Error in `get_confidence_interval()`:
      ! Confidence intervals using a `z` distribution for `stat = mean` are not implemented.

# handles missing values gracefully (#520)

    Code
      res <- get_confidence_interval(boot_dist, 0.95)
    Condition
      Warning:
      4 estimates were missing and were removed when calculating the confidence interval.


================================================
FILE: tests/testthat/_snaps/get_p_value.md
================================================
# direction is appropriate

    Code
      get_p_value(test_df, obs_stat = 0.5, direction = "righ")
    Condition
      Error in `get_p_value()`:
      ! The provided value for `direction` is not appropriate. Possible values are "less", "greater", "two-sided", "left", "right", "both", "two_sided", "two sided", or "two.sided".

# theoretical p-value not supported error

    Code
      get_p_value(calculate(hypothesize(specify(gss_tbl, hours ~ partyid), null = "independence"),
      stat = "F"), obs_stat = obs_F, direction = "right")
    Condition
      Error in `get_p_value()`:
      ! Theoretical p-values are not yet supported.
      i `x` should be the result of calling `generate()`.

# get_p_value warns in case of zero p-value

    Code
      res_ <- get_p_value(gss_calc, obs_stat = -10, direction = "left")
    Condition
      Warning:
      Please be cautious in reporting a p-value of 0. This result is an approximation based on the number of `reps` chosen in the `generate()` step.
      i See `get_p_value()` (`?infer::get_p_value()`) for more information.

# get_p_value throws error in case of `NaN` stat

    Code
      res_ <- get_p_value(gss_calc, 0, "both")
    Condition
      Error:
      ! 1 calculated statistic was `NaN`. Simulation-based p-values are not well-defined for null distributions with non-finite values.
      i See `calculate()` (`?infer::calculate()`) for more details.

---

    Code
      res_ <- get_p_value(gss_calc, 0, "both")
    Condition
      Error:
      ! 2 calculated statistics were `NaN`. Simulation-based p-values are not well-defined for null distributions with non-finite values.
      i See `calculate()` (`?infer::calculate()`) for more details.

---

    Code
      res_ <- get_p_value(gss_calc, 0, "both")
    Condition
      Error:
      ! All calculated statistics were `NaN`.
      i See `calculate()` (`?infer::calculate()`) for more details.

# get_p_value can handle fitted objects

    Code
      get_p_value(null_fits, obs_fit_2, "both")
    Condition
      Error in `get_p_value()`:
      ! The explanatory variables used to generate the distribution of null fits are not the same used to fit the observed data.

---

    Code
      get_p_value(null_fits, obs_fit_3, "both")
    Condition
      Error in `get_p_value()`:
      ! The response variable of the null fits (hours) is not the same as that of the observed fit (year).

# get_p_value can handle bad args with fitted objects

    Code
      get_p_value(null_fits, "boop", "both")
    Condition
      Error in `get_p_value()`:
      ! The `obs_stat` argument should be the output of `fit()`.
      i See the documentation with `?get_p_value`.

---

    Code
      get_p_value(null_fits, obs_fit$estimate, "both")
    Condition
      Error in `get_p_value()`:
      ! The `obs_stat` argument should be the output of `fit()`.
      i See the documentation with `?get_p_value`.

---

    Code
      get_p_value(obs_fit, null_fits, "both")
    Condition
      Error in `get_p_value()`:
      ! The `x` argument needs to be passed to `generate()` before `fit()`.

# get_p_value errors informatively when args are switched

    Code
      get_p_value(obs_stat, null_dist, "both")
    Condition
      Error in `get_p_value()`:
      ! It seems like the `obs_stat` argument has been passed to `get_p_value()` as the first argument when `get_p_value()` expects `x`, a distribution of statistics or coefficient estimates, as the first argument.
      i Have you mistakenly switched the order of `obs_stat` and `x`?

# get_p_value can handle theoretical distributions

    Code
      old_way <- chisq_test(gss, college ~ finrela)
    Condition
      Warning in `stats::chisq.test()`:
      Chi-squared approximation may be incorrect

# get_p_value warns with bad theoretical distributions

    Code
      res_ <- get_p_value(t_dist_30, t_obs, direction = "both")
    Condition
      Warning:
      `x` and `obs_stat` were generated using different null hypotheses. This workflow is untested and results may not mean what you think they mean.


================================================
FILE: tests/testthat/_snaps/hypothesize.md
================================================
# hypothesize() throws an error when null is not point or independence

    Code
      hypothesize(specify(mtcars_df, response = mpg), null = "dependence")
    Condition
      Error in `hypothesize()`:
      ! `null` should be either "point", "independence", or "paired independence".

# hypothesize() throws an error when multiple null values are provided

    Code
      hypothesize(specify(mtcars_df, response = mpg), null = c("point",
        "independence"))
    Condition
      Error in `hypothesize()`:
      ! You should specify exactly one type of null hypothesis.

# hypothesize() throws an error when multiple params are set

    Code
      hypothesize(specify(mtcars_df, response = mpg), null = "point", mu = 25, med = 20)
    Condition
      Error in `hypothesize()`:
      ! You must specify exactly one of `p`, `mu`, `med`, or `sigma`.

# hypothesize() throws a warning when params are set with independence

    Code
      res_ <- hypothesize(specify(mtcars_df, mpg ~ vs), null = "independence", mu = 25)
    Condition
      Warning:
      Parameter values should not be specified when testing that two variables are independent.

# hypothesize() throws a warning when params are set with paired independence

    Code
      res_ <- hypothesize(specify(mtcars_df, response = mpg), null = "paired independence",
      mu = 25)
    Condition
      Warning:
      Parameter values should not be specified when testing paired independence.

# hypothesize() throws an error when p is greater than 1

    Code
      res_ <- hypothesize(specify(mtcars_df, response = vs, success = "1"), null = "point",
      p = 1 + .Machine$double.eps)
    Condition
      Error in `hypothesize()`:
      ! `p` should only contain values between zero and one.

# hypothesize() throws an error when p is less than 0

    Code
      res_ <- hypothesize(specify(mtcars_df, response = vs, success = "1"), null = "point",
      p = -.Machine$double.neg.eps)
    Condition
      Error in `hypothesize()`:
      ! `p` should only contain values between zero and one.

# hypothesize() throws an error when p contains missing values

    Code
      res_ <- hypothesize(specify(mtcars_df, response = vs, success = "1"), null = "point",
      p = c(`0` = 0.5, `1` = NA_real_))
    Condition
      Error in `hypothesize()`:
      ! `p` should not contain missing values.

# hypothesize() throws an error when vector p does not sum to 1

    Code
      res_ <- hypothesize(specify(mtcars_df, response = vs, success = "1"), null = "point",
      p = c(`0` = 0.5, `1` = 0.5 + (eps * 2)))
    Condition
      Error in `hypothesize()`:
      ! Make sure the hypothesized values for the `p` parameters sum to 1. Please try again.

# hypothesize arguments function

    Code
      res_ <- hypothesize(matrix1)
    Condition
      Error in `hypothesize()`:
      ! `null` should be either "point", "independence", or "paired independence".

---

    Code
      res_ <- hypothesize(mtcars_s, null = NA)
    Condition
      Error in `hypothesize()`:
      ! `null` should be either "point", "independence", or "paired independence".

---

    Code
      res_ <- hypothesize(mtcars_s)
    Condition
      Error in `hypothesize()`:
      ! `null` should be either "point", "independence", or "paired independence".

---

    Code
      res_ <- hypothesize(mtcars_s, null = "point", mean = 3)
    Condition
      Error in `hypothesize()`:
      ! unused argument (mean = 3)

---

    Code
      res_ <- hypothesize(mtcars_s, null = "independence")
    Condition
      Error in `hypothesize()`:
      ! Please `specify()` an explanatory and a response variable when testing a null hypothesis of `"independence"`.

---

    Code
      res_ <- hypothesize(mtcars_s, null = "point")
    Condition
      Error in `hypothesize()`:
      ! You must specify exactly one of `p`, `mu`, `med`, or `sigma`.

---

    Code
      res_ <- hypothesize(specify(mtcars_f, mpg ~ am), null = "paired independence")
    Condition
      Error in `hypothesize()`:
      ! Please `specify()` only a response variable when testing a null hypothesis of `"paired independence"`.
      i The supplied response variable should be the pre-computed difference between paired observations.

---

    Code
      res <- hypothesize(mtcars_s, null = c("point", "independence"), mu = 3)
    Condition
      Error in `hypothesize()`:
      ! You should specify exactly one type of null hypothesis.

---

    Code
      res_ <- hypothesize(dplyr::select(mtcars_df, vs), null = "point", mu = 1)
    Condition
      Error in `.subset2()`:
      ! attempt to select less than one element in get1index

---

    Code
      res_ <- hypothesize(specify(mtcars_df, response = vs), null = "point", mu = 1)
    Condition
      Error in `specify()`:
      ! A level of the response variable `vs` needs to be specified for the `success` argument in `specify()`.

---

    Code
      res_ <- hypothesize(mtcars_s, null = "point", p = 0.2)
    Condition
      Error in `hypothesize()`:
      ! A point null regarding a proportion requires that `success` be indicated in `specify()`.

---

    Code
      res_ <- hypothesize(mtcars_s)
    Condition
      Error in `hypothesize()`:
      ! `null` should be either "point", "independence", or "paired independence".

# params correct

    Code
      res_ <- hypothesize(one_prop_specify, null = "point", mu = 2)
    Condition
      Error in `hypothesize()`:
      ! Testing one categorical variable requires `p` to be used as a parameter.

---

    Code
      res_ <- hypothesize(one_mean_specify, null = "point", mean = 0.5)
    Condition
      Error in `hypothesize()`:
      ! unused argument (mean = 0.5)

# user can specify multiple explanatory variables

    Code
      res_ <- hypothesize(specify(gss, hours ~ sex + college), null = "independence",
      mu = 40)
    Condition
      Warning:
      Parameter values should not be specified when testing that two variables are independent.


================================================
FILE: tests/testthat/_snaps/observe.md
================================================
# observe() output is the same as the old wrappers

    Code
      res_wrap <- chisq_stat(gss_tbl, college ~ partyid)
    Condition
      Warning:
      `chisq_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      res_wrap_2 <- t_stat(gss_tbl, hours ~ sex, order = c("male", "female"))
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.


================================================
FILE: tests/testthat/_snaps/print.md
================================================
# print method fits linewidth with many predictors (#543)

    Code
      specify(mtcars, mpg ~ cyl + disp + hp + drat + wt + qsec)
    Output
      Response: mpg (numeric)
      Explanatory: cyl (numeric), disp (numeric), hp (numer...
      # A tibble: 32 x 7
           mpg   cyl  disp    hp  drat    wt  qsec
         <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
       1  21       6  160    110  3.9   2.62  16.5
       2  21       6  160    110  3.9   2.88  17.0
       3  22.8     4  108     93  3.85  2.32  18.6
       4  21.4     6  258    110  3.08  3.22  19.4
       5  18.7     8  360    175  3.15  3.44  17.0
       6  18.1     6  225    105  2.76  3.46  20.2
       7  14.3     8  360    245  3.21  3.57  15.8
       8  24.4     4  147.    62  3.69  3.19  20  
       9  22.8     4  141.    95  3.92  3.15  22.9
      10  19.2     6  168.   123  3.92  3.44  18.3
      # i 22 more rows


================================================
FILE: tests/testthat/_snaps/rep_sample_n.md
================================================
# `rep_sample_n` checks input

    Code
      rep_sample_n("a", size = 1)
    Condition
      Error in `rep_sample_n()`:
      ! `tbl` must be 'data.frame', not 'character'.

---

    Code
      rep_sample_n(population, size = "a")
    Condition
      Error in `rep_sample_n()`:
      ! `size` must be 'single non-negative number', not 'character'.

---

    Code
      rep_sample_n(population, size = 1:2)
    Condition
      Error in `rep_sample_n()`:
      ! `size` must be 'single non-negative number', not 'integer'.

---

    Code
      rep_sample_n(population, size = -1)
    Condition
      Error in `rep_sample_n()`:
      ! `size` must be 'single non-negative number', not 'double'.

---

    Code
      rep_sample_n(population, size = 1, replace = "a")
    Condition
      Error in `rep_sample_n()`:
      ! `replace` must be 'TRUE or FALSE', not 'character'.

---

    Code
      rep_sample_n(population, size = 1, reps = "a")
    Condition
      Error in `rep_sample_n()`:
      ! `reps` must be 'single number not less than 1', not 'character'.

---

    Code
      rep_sample_n(population, size = 1, reps = 1:2)
    Condition
      Error in `rep_sample_n()`:
      ! `reps` must be 'single number not less than 1', not 'integer'.

---

    Code
      rep_sample_n(population, size = 1, reps = 0.5)
    Condition
      Error in `rep_sample_n()`:
      ! `reps` must be 'single number not less than 1', not 'double'.

---

    Code
      rep_sample_n(population, size = 1, prob = "a")
    Condition
      Error in `rep_sample_n()`:
      ! `prob` must be 'numeric vector with length `nrow(tbl)` = 5', not 'character'.

---

    Code
      rep_sample_n(population, size = 1, prob = c(0.1, 0.9))
    Condition
      Error in `rep_sample_n()`:
      ! `prob` must be 'numeric vector with length `nrow(tbl)` = 5', not 'double'.

# `rep_sample_n` gives error on big sample size if `replace=FALSE`

    Code
      rep_sample_n(population, size = n_population * 2)
    Condition
      Error in `rep_sample_n()`:
      ! Asked sample size (10) is bigger than number of rows in data (5) while `replace` is FALSE. Use `replace = TRUE`.

# `rep_slice_sample` checks input

    Code
      rep_slice_sample("a", n = 1)
    Condition
      Error in `rep_slice_sample()`:
      ! `.data` must be 'data.frame', not 'character'.

---

    Code
      rep_slice_sample(population, n = "a")
    Condition
      Error in `rep_slice_sample()`:
      ! `n` must be 'single non-negative number', not 'character'.

---

    Code
      rep_slice_sample(population, n = 1:2)
    Condition
      Error in `rep_slice_sample()`:
      ! `n` must be 'single non-negative number', not 'integer'.

---

    Code
      rep_slice_sample(population, n = -1)
    Condition
      Error in `rep_slice_sample()`:
      ! `n` must be 'single non-negative number', not 'double'.

---

    Code
      rep_slice_sample(population, prop = "a")
    Condition
      Error in `rep_slice_sample()`:
      ! `prop` must be 'single non-negative number', not 'character'.

---

    Code
      rep_slice_sample(population, prop = 1:2)
    Condition
      Error in `rep_slice_sample()`:
      ! `prop` must be 'single non-negative number', not 'integer'.

---

    Code
      rep_slice_sample(population, prop = -1)
    Condition
      Error in `rep_slice_sample()`:
      ! `prop` must be 'single non-negative number', not 'double'.

---

    Code
      rep_slice_sample(population, n = 1, prop = 0.5)
    Condition
      Error in `rep_slice_sample()`:
      ! Please supply exactly one of the `n` or `prop` arguments.

---

    Code
      rep_slice_sample(population, n = 1, replace = "a")
    Condition
      Error in `rep_slice_sample()`:
      ! `replace` must be 'TRUE or FALSE', not 'character'.

---

    Code
      rep_slice_sample(population, n = 1, weight_by = "a")
    Condition
      Error in `rep_slice_sample()`:
      ! `weight_by` must be 'a numeric vector with length `nrow(.data)` = 5 or an unquoted column name', not 'character'.

---

    Code
      rep_slice_sample(population, n = 1, weight_by = c(0.1, 0.9))
    Condition
      Error in `rep_slice_sample()`:
      ! `weight_by` must be 'a numeric vector with length `nrow(.data)` = 5 or an unquoted column name', not 'double'.

---

    Code
      rep_slice_sample(population, n = 1, weight_by = wts)
    Condition
      Error in `rep_slice_sample()`:
      ! The column wts provided to the `weight_by` argument is not in the supplied data.

---

    Code
      rep_slice_sample(population, n = 1, reps = "a")
    Condition
      Error in `rep_slice_sample()`:
      ! `reps` must be 'single number not less than 1', not 'character'.

---

    Code
      rep_slice_sample(population, n = 1, reps = 1:2)
    Condition
      Error in `rep_slice_sample()`:
      ! `reps` must be 'single number not less than 1', not 'integer'.

---

    Code
      rep_slice_sample(population, n = 1, reps = 0.5)
    Condition
      Error in `rep_slice_sample()`:
      ! `reps` must be 'single number not less than 1', not 'double'.

# `rep_slice_sample` warns on big sample size if `replace = FALSE`

    Code
      out <- rep_slice_sample(population, n = n_population * 2, reps = 1)
    Condition
      Warning:
      Asked sample size (10) is bigger than number of rows in data (5) while `replace` is FALSE. Using number of rows as sample size.

---

    Code
      out <- rep_slice_sample(population, prop = 2, reps = 1)
    Condition
      Warning:
      Asked sample size (10) is bigger than number of rows in data (5) while `replace` is FALSE. Using number of rows as sample size.


================================================
FILE: tests/testthat/_snaps/shade_confidence_interval.md
================================================
# shade_confidence_interval throws errors and warnings

    Code
      res_ <- gss_viz_sim + shade_confidence_interval(c(1, 2, 3))
    Condition
      Warning:
      Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. Using the first two entries as the `endpoints`.

---

    Code
      res_ <- gss_viz_sim + shade_confidence_interval(data.frame(x = 1))
    Condition
      Error in `shade_confidence_interval()`:
      ! Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector.

---

    Code
      res_ <- gss_viz_sim + shade_confidence_interval(c(-1, 1), color = "x")
    Condition
      Error in `shade_confidence_interval_term()`:
      ! `color` must be 'color string', not 'character'.

---

    Code
      res_ <- gss_viz_sim + shade_confidence_interval(c(-1, 1), fill = "x")
    Condition
      Error in `shade_confidence_interval_term()`:
      ! `fill` must be 'color string', not 'character'.

---

    Code
      res_ <- shade_confidence_interval(gss_viz_sim, c(-1, 1))
    Condition
      Error in `shade_confidence_interval()`:
      ! It looks like you piped the result of `visualize()` into `shade_confidence_interval()` rather than adding the result of `shade_confidence_interval()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.

---

    Code
      res_ <- shade_confidence_interval(gss_viz_sim, endpoints = c(-1, 1))
    Condition
      Error in `shade_confidence_interval()`:
      ! It looks like you piped the result of `visualize()` into `shade_confidence_interval()` rather than adding the result of `shade_confidence_interval()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.

---

    Code
      res_ <- shade_ci(gss_viz_sim, c(-1, 1))
    Condition
      Error in `shade_ci()`:
      ! It looks like you piped the result of `visualize()` into `shade_ci()` rather than adding the result of `shade_ci()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.

---

    Code
      res_ <- shade_ci(gss_viz_sim, endpoints = c(-1, 1))
    Condition
      Error in `shade_ci()`:
      ! It looks like you piped the result of `visualize()` into `shade_ci()` rather than adding the result of `shade_ci()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.


================================================
FILE: tests/testthat/_snaps/shade_p_value.md
================================================
# shade_p_value throws errors

    Code
      gss_viz_sim + shade_p_value("a", "right")
    Condition
      Error in `shade_p_value()`:
      ! `obs_stat` must be 'numeric', not 'character'.

---

    Code
      gss_viz_sim + shade_p_value(1, 1)
    Condition
      Error in `shade_p_value()`:
      ! `direction` must be 'character', not 'double'.

---

    Code
      gss_viz_sim + shade_p_value(1, "right", color = "x")
    Condition
      Error in `shade_p_value()`:
      ! `color` must be 'color string', not 'character'.

---

    Code
      gss_viz_sim + shade_p_value(1, "right", fill = "x")
    Condition
      Error in `shade_p_value()`:
      ! `fill` must be 'color string', not 'character'.

---

    Code
      shade_p_value(gss_viz_sim, 1, "right")
    Condition
      Error in `shade_p_value()`:
      ! It looks like you piped the result of `visualize()` into `shade_p_value()` rather than adding the result of `shade_p_value()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.

---

    Code
      shade_p_value(gss_viz_sim, obs_stat = 1)
    Condition
      Error in `shade_p_value()`:
      ! It looks like you piped the result of `visualize()` into `shade_p_value()` rather than adding the result of `shade_p_value()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.

---

    Code
      shade_p_value(gss_viz_sim, obs_stat = 1, direction = "right")
    Condition
      Error in `shade_p_value()`:
      ! It looks like you piped the result of `visualize()` into `shade_p_value()` rather than adding the result of `shade_p_value()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.

---

    Code
      shade_pvalue(gss_viz_sim, 1, "right")
    Condition
      Error in `shade_pvalue()`:
      ! It looks like you piped the result of `visualize()` into `shade_pvalue()` rather than adding the result of `shade_pvalue()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.

---

    Code
      shade_pvalue(gss_viz_sim, obs_stat = 1)
    Condition
      Error in `shade_pvalue()`:
      ! It looks like you piped the result of `visualize()` into `shade_pvalue()` rather than adding the result of `shade_pvalue()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.

---

    Code
      shade_pvalue(gss_viz_sim, obs_stat = 1, direction = "right")
    Condition
      Error in `shade_pvalue()`:
      ! It looks like you piped the result of `visualize()` into `shade_pvalue()` rather than adding the result of `shade_pvalue()` as a layer with `+`.
      i Consider changing `|>` (or `%>%`) to `+`.


================================================
FILE: tests/testthat/_snaps/specify.md
================================================
# data argument

    Code
      specify(blah ~ cyl)
    Condition
      Error in `specify()`:
      ! `x` must be 'data.frame', not 'language'.

---

    Code
      specify(1:3)
    Condition
      Error in `specify()`:
      ! `x` must be 'data.frame', not 'integer'.

---

    Code
      specify(mtcars_df, mtcars_df$mpg)
    Condition
      Error in `specify()`:
      ! The first unnamed argument must be a formula.
      i You passed in 'double'.
      x Did you forget to name one or more arguments?

# response and explanatory arguments

    Code
      specify(mtcars_df, response = blah)
    Condition
      Error in `specify()`:
      ! The response variable `blah` cannot be found in this dataframe.

---

    Code
      specify(mtcars_df, response = "blah")
    Condition
      Error in `specify()`:
      ! The response should be a bare variable name (not a string in quotation marks).

---

    Code
      specify(mtcars_df, formula = mpg ~ blah)
    Condition
      Error in `specify()`:
      ! The explanatory variable `blah` cannot be found in this dataframe.

---

    Code
      specify(mtcars_df, blah2 ~ cyl)
    Condition
      Error in `specify()`:
      ! The response variable `blah2` cannot be found in this dataframe.

---

    Code
      specify(mtcars_df)
    Condition
      Error in `specify()`:
      ! Please supply a response variable that is not `NULL`.

---

    Code
      specify(mtcars_df, formula = mpg ~ mpg)
    Condition
      Error in `specify()`:
      ! The response and explanatory variables must be different from one another.

---

    Code
      specify(mtcars_df, formula = "mpg" ~ cyl)
    Condition
      Error in `specify()`:
      ! The response should be a bare variable name (not a string in quotation marks).

---

    Code
      specify(mtcars_df, formula = mpg ~ "cyl")
    Condition
      Error in `specify()`:
      ! The explanatory should be a bare variable name (not a string in quotation marks).

---

    Code
      specify(mtcars_df, formula = NULL ~ cyl)
    Condition
      Error in `specify()`:
      ! Please supply a response variable that is not `NULL`.

# success argument

    Code
      specify(mtcars_df, response = vs, success = 1)
    Condition
      Error in `specify()`:
      ! `success` must be a string.

---

    Code
      specify(mtcars_df, response = vs, success = "bogus")
    Condition
      Error in `specify()`:
      ! bogus is not a valid level of vs.

---

    Code
      specify(mtcars_df, response = mpg, success = "1")
    Condition
      Error in `specify()`:
      ! `success` should only be specified if the response is a categorical variable.

---

    Code
      specify(mtcars_df, response = cyl, success = "4")
    Condition
      Error in `specify()`:
      ! `success` can only be used if the response has two levels. `filter()` can reduce a variable to two levels.

---

    Code
      specify(mtcars_df, response = am)
    Condition
      Error in `specify()`:
      ! A level of the response variable `am` needs to be specified for the `success` argument in `specify()`.

# formula argument is a formula

    Code
      specify(mtcars_df, formula = "vs", success = 1)
    Condition
      Error in `specify()`:
      ! The first unnamed argument must be a formula.
      i You passed in 'character'.
      x Did you forget to name one or more arguments?

---

    Code
      specify(mtcars, am, success = "1")
    Condition
      Error in `specify()`:
      ! The argument you passed in for the formula does not exist.
      i Were you trying to pass in an unquoted column name?
      i Did you forget to name one or more arguments?

---

    Code
      specify(mtcars, response = am, "1")
    Condition
      Error in `specify()`:
      ! The first unnamed argument must be a formula.
      i You passed in 'character'.
      x Did you forget to name one or more arguments?

# is_complete works

    Code
      res_ <- specify(some_missing, response = vec)
    Condition
      Warning:
      Removed 1 rows containing missing values.

# specify messages when dropping unused levels

    Code
      res_ <- specify(dplyr::filter(gss, partyid %in% c("rep", "dem")), age ~ partyid)
    Message
      Dropping unused factor levels c("ind", "other", "DK") from the supplied explanatory variable 'partyid'.

---

    Code
      res_ <- specify(dplyr::filter(gss, partyid %in% c("rep", "dem")), partyid ~ age)
    Message
      Dropping unused factor levels c("ind", "other", "DK") from the supplied response variable 'partyid'.

---

    Code
      res_ <- specify(dplyr::filter(gss, partyid %in% c("rep", "dem")), partyid ~
      NULL)
    Message
      Dropping unused factor levels c("ind", "other", "DK") from the supplied response variable 'partyid'.


================================================
FILE: tests/testthat/_snaps/utils.md
================================================
# check_type works

    Code
      check_type(x_var, is.character)
    Condition
      Error:
      ! `x_var` must be 'character', not 'integer'.

---

    Code
      check_type(x_var, is.character, "symbolic")
    Condition
      Error:
      ! `x_var` must be 'symbolic', not 'integer'.

---

    Code
      check_type(x_df, is.logical)
    Condition
      Error:
      ! `x_df` must be 'logical', not 'data.frame'.

# check_type allows custom name for `x`

    Code
      check_type(input, is.numeric, x_name = "aaa")
    Condition
      Error:
      ! `aaa` must be 'numeric', not 'character'.

# check_type allows extra arguments for `predicate`

    Code
      check_type(1, is_geq, min_val = 2)
    Condition
      Error:
      ! `1` must be 'geq', not 'double'.

# check_type allows formula `predicate`

    Code
      check_type("a", ~ is.numeric(.))
    Condition
      Error:
      ! `"a"` must be '~is.numeric(.)', not 'character'.

# hypothesize errors out when x isn't a dataframe

    Code
      hypothesize(c(1, 2, 3), null = "point")
    Condition
      Error in `hypothesize()`:
      ! x must be a data.frame or tibble


================================================
FILE: tests/testthat/_snaps/visualize.md
================================================
# visualize warns with bad arguments

    Code
      res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, age ~ hours),
      null = "independence"), reps = 100, type = "permute"), stat = "slope"),
      obs_stat = obs_slope, direction = "right")
    Condition
      Warning:
      The arguments `c("obs_stat", "direction")` are deprecated in `visualize()` and will be ignored. They should now be passed to one of `shade_p_value()` or `shade_confidence_interval()`.

---

    Code
      res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, age ~ hours),
      null = "independence"), reps = 100, type = "permute"), stat = "slope"),
      obs_stat = obs_slope)
    Condition
      Warning:
      The arguments `obs_stat` are deprecated in `visualize()` and will be ignored. They should now be passed to one of `shade_p_value()` or `shade_confidence_interval()`.

---

    Code
      res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, age ~ hours),
      null = "independence"), reps = 100, type = "permute"), stat = "slope"),
      endpoints = c(0.01, 0.02))
    Condition
      Warning:
      The arguments `endpoints` are deprecated in `visualize()` and will be ignored. They should now be passed to one of `shade_p_value()` or `shade_confidence_interval()`.

---

    Code
      res <- visualize(age_hours_df, endpoints = c(0.01, 0.02))
    Condition
      Warning:
      The arguments `endpoints` are deprecated in `visualize()` and will be ignored. They should now be passed to one of `shade_p_value()` or `shade_confidence_interval()`.

# visualize basic tests

    Code
      visualize(hours_resamp, bins = "yep")
    Condition
      Error in `visualize()`:
      ! `bins` must be 'numeric', not 'character'.

---

    argument "obs_stat" is missing, with no default

---

    Code
      res_vis_theor_none_1 <- visualize(calculate(hypothesize(specify(gss_tbl, sex ~
        college, success = "female"), null = "independence"), stat = "z", order = c(
        "no degree", "degree")), method = "theoretical")
    Condition
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.

---

    Code
      visualize(calculate(generate(hypothesize(specify(gss_tbl, sex ~ college,
      success = "female"), null = "independence"), reps = 100, type = "permute"),
      stat = "diff in props", order = c("no degree", "degree")), method = "both") +
        shade_p_value(direction = "both", obs_stat = obs_diff)
    Condition
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.
      Error in `theoretical_layer()`:
      ! Your `calculate`d statistic and the theoretical distribution are on different scales. Use a standardized `stat` instead.

---

    Code
      visualize(hypothesize(specify(gss_tbl, partyid ~ NULL), null = "point", p = c(
        dem = 0.4, rep = 0.4, ind = 0.2)), method = "traditional")
    Condition
      Error in `visualize()`:
      ! Provide `method` with one of three options: `"theoretical"`, `"both"`, or `"simulation"`. `"simulation"` is the default for simulation-based null distributions, while `"theoretical"` is the only option for null distributions outputted by `assume()`.

---

    Code
      visualize(calculate(generate(hypothesize(specify(gss_tbl, hours ~ sex), null = "independence"),
      reps = 100, type = "permute"), stat = "diff in means", order = c("female",
        "male")), method = "both") + shade_p_value(direction = "both", obs_stat = obs_diff_mean)
    Condition
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.
      Error in `theoretical_layer()`:
      ! Your `calculate`d statistic and the theoretical distribution are on different scales. Use a standardized `stat` instead.

---

    Code
      res_vis_theor_both_1 <- visualize(calculate(generate(hypothesize(specify(
        gss_tbl, hours ~ sex), null = "independence"), reps = 100, type = "permute"),
      stat = "diff in means", order = c("female", "male")), method = "theoretical") +
        shade_p_value(direction = "both", obs_stat = obs_diff_mean)
    Condition
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.
      Warning:
      Your `calculate`d statistic and the theoretical distribution are on different scales. Displaying only the theoretical distribution.

# method = "both" behaves nicely

    Code
      visualize(generate(hypothesize(specify(gss_tbl, hours ~ NULL), null = "point",
      mu = 4), reps = 100, type = "bootstrap"), method = "both")
    Condition
      Error in `visualize()`:
      ! `generate()` and `calculate()` are both required to be done prior to `visualize(method = "both")`

---

    Code
      res_method_both <- visualize(calculate(generate(hypothesize(specify(gss_tbl,
        hours ~ college), null = "point", mu = 4), reps = 10, type = "bootstrap"),
      stat = "t", order = c("no degree", "degree")), method = "both")
    Condition
      Warning:
      With only 10 replicates, it may be difficult to see the relationship between simulation and theory.
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.

# Traditional right-tailed tests have warning if not right-tailed

    Code
      res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, sex ~ partyid,
      success = "female"), null = "independence"), reps = 100, type = "permute"),
      stat = "Chisq"), method = "both") + shade_p_value(obs_stat = 2, direction = "left")
    Condition
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.

---

    Code
      res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, age ~ partyid),
      null = "independence"), reps = 100, type = "permute"), stat = "F"), method = "both") +
        shade_p_value(obs_stat = 2, direction = "two_sided")
    Condition
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.

---

    Code
      res_ <- visualize(calculate(hypothesize(specify(gss_tbl, sex ~ partyid,
      success = "female"), null = "independence"), stat = "Chisq"), method = "theoretical") +
        shade_p_value(obs_stat = 2, direction = "left")
    Message
      Rather than setting `method = "theoretical"` with a simulation-based null distribution, the preferred method for visualizing theory-based distributions with infer is now to pass the output of `assume()` as the first argument to `visualize()`.
    Condition
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.

---

    Code
      res_ <- visualize(calculate(hypothesize(specify(gss_tbl, age ~ partyid), null = "independence"),
      stat = "F"), method = "theoretical") + shade_p_value(obs_stat = 2, direction = "two_sided")
    Message
      Rather than setting `method = "theoretical"` with a simulation-based null distribution, the preferred method for visualizing theory-based distributions with infer is now to pass the output of `assume()` as the first argument to `visualize()`.
    Condition
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.

# confidence interval plots are working

    Code
      res_ <- visualize(gss_tbl_boot) + shade_confidence_interval(endpoints = df_error)
    Condition
      Error in `shade_confidence_interval()`:
      ! Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector.

---

    Code
      res_ <- visualize(gss_tbl_boot) + shade_confidence_interval(endpoints = vec_error)
    Condition
      Warning:
      Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. Using the first two entries as the `endpoints`.

---

    Code
      res_ci_vis <- visualize(gss_tbl_boot) + shade_confidence_interval(endpoints = perc_ci,
        direction = "between")
    Condition
      Warning:
      Ignoring unknown parameters: `direction`
      Warning:
      Ignoring unknown parameters: `direction`

# title adapts to not hypothesis testing workflow

    Code
      res_vis_no_hypothesize_both <- visualize(calculate(gss_tbl_boot_tbl, stat = "t"),
      method = "both")
    Condition
      Warning:
      A t statistic requires a null hypothesis to calculate the observed statistic.
      Output assumes the following null value: `mu = 0`.
      Warning:
      Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you.

# warn_right_tail_test works

    Code
      warn_right_tail_test("left", stat_name)
    Condition
      Warning:
      F usually corresponds to right-tailed tests. Proceed with caution.
    Output
      [1] TRUE

---

    Code
      warn_right_tail_test("two_sided", stat_name)
    Condition
      Warning:
      F usually corresponds to right-tailed tests. Proceed with caution.
    Output
      [1] TRUE

---

    Code
      warn_right_tail_test("left", stat_name)
    Condition
      Warning:
      Chi-Square usually corresponds to right-tailed tests. Proceed with caution.
    Output
      [1] TRUE

---

    Code
      warn_right_tail_test("two_sided", stat_name)
    Condition
      Warning:
      Chi-Square usually corresponds to right-tailed tests. Proceed with caution.
    Output
      [1] TRUE

# visualize warns about removing `NaN`

    Code
      res_ <- visualize(dist)
    Condition
      Warning:
      1 calculated statistic was `NaN`. `NaN`s have been omitted from visualization.
      i See `calculate()` (`?infer::calculate()`) for more details.

---

    Code
      res_ <- visualize(dist)
    Condition
      Warning:
      2 calculated statistics were `NaN`. `NaN`s have been omitted from visualization.
      i See `calculate()` (`?infer::calculate()`) for more details.

---

    Code
      res_ <- visualize(dist)
    Condition
      Error:
      ! All calculated statistics were `NaN`.
      i See `calculate()` (`?infer::calculate()`) for more details.

# visualize can handle multiple explanatory variables

    Code
      res_viz_fit_p_val_right <- visualize(null_fits) + shade_p_value(obs_stat = obs_fit,
        direction = "right")

# visualize can handle `assume()` output

    Code
      res_viz_assume_t_sim <- visualize(null_dist, method = "simulation")
    Condition
      Warning:
      Simulation-based visualization methods are not well-defined for `assume()` output; the `method` argument will be ignored.
      i Set `method = "theoretical"` to silence this message.

---

    Code
      res_viz_assume_t_both <- visualize(null_dist, method = "both")
    Condition
      Warning:
      Simulation-based visualization methods are not well-defined for `assume()` output; the `method` argument will be ignored.
      i Set `method = "theoretical"` to silence this message.


================================================
FILE: tests/testthat/_snaps/wrappers.md
================================================
# t_test works

    Code
      res_ <- t_test(gss_tbl, hours ~ sex)
    Condition
      Warning:
      The statistic is based on a difference or ratio; by default, for difference-based statistics, the explanatory variable is subtracted in the order "male" - "female", or divided in the order "male" / "female" for ratio-based statistics. To specify this order yourself, supply `order = c("male", "female")`.

---

    Code
      t_test(gss_tbl, response = "hours", explanatory = "sex")
    Condition
      Error in `t_test()`:
      ! The response should be a bare variable name (not a string in quotation marks).

# chisq_test works

    Code
      chisq_test(x = gss_tbl, response = age, explanatory = partyid)
    Condition
      Error in `chisq_test()`:
      ! The response variable of `age` is not appropriate since the response variable is expected to be categorical.

---

    Code
      chisq_test(x = gss_tbl, response = partyid, explanatory = age)
    Condition
      Error in `chisq_test()`:
      ! The explanatory variable of `age` is not appropriate since the explanatory variable is expected to be categorical.

# _stat functions work

    Code
      res_ <- chisq_stat(gss_tbl, college ~ partyid)
    Condition
      Warning:
      `chisq_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      obs_stat_way <- chisq_stat(gss_tbl, college ~ partyid)
    Condition
      Warning:
      `chisq_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      obs_stat_way <- chisq_stat(gss_tbl, partyid ~ NULL)
    Condition
      Warning:
      `chisq_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      obs_stat_way_alt <- chisq_stat(gss_tbl, response = partyid)
    Condition
      Warning:
      `chisq_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      res_ <- t_stat(gss_tbl, hours ~ sex, order = c("male", "female"))
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      obs_stat_way <- t_stat(gss_tbl, hours ~ sex, order = c("male", "female"))
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      obs_stat_way_alt <- t_stat(gss_tbl, response = hours, explanatory = sex, order = c(
        "male", "female"))
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      res_ <- t_stat(gss_tbl, hours ~ NULL)
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      obs_stat_way <- t_stat(gss_tbl, hours ~ NULL)
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      obs_stat_way_alt <- t_stat(gss_tbl, response = hours)
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      res_ <- chisq_stat(x = gss_tbl, response = age, explanatory = sex)
    Condition
      Warning:
      `chisq_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.
      Error in `chisq_stat()`:
      ! The response variable of `age` is not appropriate since the response variable is expected to be categorical.

---

    Code
      res_ <- chisq_stat(x = gss_tbl, response = sex, explanatory = age)
    Condition
      Warning:
      `chisq_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.
      Error in `chisq_stat()`:
      ! The explanatory variable of `age` is not appropriate since the response variable is expected to be categorical.

# conf_int argument works

    Code
      res_ <- t_test(gss_tbl, hours ~ sex, order = c("female", "male"), conf_int = TRUE,
      conf_level = 1.1)
    Condition
      Error in `t_test()`:
      ! The `conf_level` argument must be a number between 0 and 1.

---

    Code
      no_var_equal <- t_stat(gss_tbl_small, hours ~ sex, order = c("female", "male"))
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

---

    Code
      var_equal <- t_stat(gss_tbl_small, hours ~ sex, order = c("female", "male"),
      var.equal = TRUE)
    Condition
      Warning:
      `t_stat()` was deprecated in infer 1.0.0.
      i Please use `observe()` instead.

# two sample prop_test works

    Code
      res_ <- prop_test(df, resp ~ exp)
    Condition
      Warning:
      The statistic is based on a difference or ratio; by default, for difference-based statistics, the explanatory variable is subtracted in the order "a" - "b", or divided in the order "a" / "b" for ratio-based statistics. To specify this order yourself, supply `order = c("a", "b")`.

---

    Code
      res_ <- prop_test(bad_df, resp ~ exp)
    Condition
      Warning in `anova.lm()`:
      ANOVA F-tests on an essentially perfect fit are unreliable
      Error in `prop_test()`:
      ! The response variable of `resp` is not appropriate since the response variable is expected to be categorical.

---

    Code
      res_ <- prop_test(bad_df2, resp ~ exp)
    Condition
      Error in `prop_test()`:
      ! The explanatory variable of `exp` is not appropriate since the explanatory variable is expected to be categorical.

# one sample prop_test works

    Code
      res_ <- prop_test(df_1, resp ~ NULL)
    Message
      No `p` argument was hypothesized, so the test will assume a null hypothesis `p = .5`.

---

    Code
      res_ <- prop_test(df_1, resp ~ NULL, p = 0.2, success = "b")
    Condition
      Error in `prop_test()`:
      ! b is not a valid level of resp.

# prop_test handles >2 explanatory levels gracefully

    Code
      res_2 <- prop_test(dfr, resp ~ exp, order = c("a", "b"))
    Condition
      Warning:
      The `order` argument will be ignored as it is not well-defined for explanatory variables with more than 2 levels.
      i To silence this message, avoid passing the `order` argument.

---

    Code
      res_3 <- prop_test(dfr, resp ~ exp, order = c("a", "b", "c"))
    Condition
      Warning:
      The `order` argument will be ignored as it is not well-defined for explanatory variables with more than 2 levels.
      i To silence this message, avoid passing the `order` argument.

# prop_test errors with >2 response levels

    Code
      res_1 <- prop_test(dfr, resp ~ exp)
    Condition
      Error in `prop_test()`:
      ! This test is not defined for response variables with more than 2 levels.

# wrappers can handled ordered factors

    Code
      ordered_t_1 <- chisq_test(dplyr::mutate(gss_tbl, income = factor(income,
        ordered = TRUE)), income ~ partyid)
    Condition
      Warning in `stats::chisq.test()`:
      Chi-squared approximation may be incorrect

---

    Code
      ordered_f_1 <- chisq_test(dplyr::mutate(gss_tbl, income = factor(income,
        ordered = FALSE)), income ~ partyid)
    Condition
      Warning in `stats::chisq.test()`:
      Chi-squared approximation may be incorrect

---

    Code
      ordered_t_2 <- chisq_test(dplyr::mutate(gss_tbl, income = factor(income,
        ordered = TRUE)), partyid ~ income)
    Condition
      Warning in `stats::chisq.test()`:
      Chi-squared approximation may be incorrect

---

    Code
      ordered_f_2 <- chisq_test(dplyr::mutate(gss_tbl, income = factor(income,
        ordered = FALSE)), partyid ~ income)
    Condition
      Warning in `stats::chisq.test()`:
      Chi-squared approximation may be incorrect


================================================
FILE: tests/testthat/helper-data.R
================================================
set.seed(4242)

expect_doppelganger <- function(title, fig, ...) {
  testthat::skip_if_not_installed("vdiffr")
  vdiffr::expect_doppelganger(title, fig, ...)
}

eps <- if (capabilities("long.double")) {
  sqrt(.Machine$double.eps)
} else {
  0.01
}

gss_tbl <- tibble::as_tibble(gss) |>
  dplyr::filter(!(is.na(sex) | is.na(college))) |>
  dplyr::mutate(partyid = as.character(partyid)) |>
  dplyr::filter(partyid %in% c("ind", "rep", "dem"))

gss_calc <- gss_tbl |>
  specify(college ~ sex, success = "no degree") |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  calculate(stat = "diff in props", order = c("female", "male"))

mtcars_df <- mtcars |>
  dplyr::mutate(
    cyl = factor(cyl),
    vs = factor(vs),
    am = factor(am),
    gear = factor(gear),
    carb = factor(carb)
  )

obs_diff <- gss_tbl |>
  specify(college ~ sex, success = "no degree") |>
  calculate(stat = "diff in props", order = c("female", "male"))

set.seed(2018)
test_df <- tibble::tibble(stat = rnorm(100))

# Data for visualization tests

gss_permute <- gss_tbl |>
  specify(college ~ sex, success = "no degree") |>
  hypothesize(null = "independence") |>
  generate(reps = 100, type = "permute") |>
  calculate(stat = "z", order = c("female", "male"))

gss_viz_sim <- gss_permute |> visualize(method = "simulation")

# Warnings are about checking conditions for the theoretical method.
gss_viz_theor <- suppressWarnings(suppressMessages(
  gss_permute |> visualize(method = "theoretical")
))
gss_viz_both <- suppressWarnings(
  gss_permute |> visualize(method = "both")
)


================================================
FILE: tests/testthat/setup.R
================================================
withr::local_envvar(SUPPRESS_INFER_MESSAGES = "true", .local_envir = teardown_env())


================================================
FILE: tests/testthat/test-aliases.R
================================================
test_that("aliases work", {
  expect_equal(
    gss_calc |>
      get_pvalue(obs_stat = -0.2, direction = "right") |>
      dplyr::pull(),
    expected = 1,
    tolerance = eps
  )

  expect_silent(gss_permute |> get_ci())
})

test_that("old aliases produce informative error", {
  expect_snapshot(
    error = TRUE,
    res <- gss_calc |>
      p_value(obs_stat = -0.2, direction = "right")
  )

  expect_snapshot(
    error = TRUE,
    res_ <- gss_permute |> conf_int()
  )
})


================================================
FILE: tests/testthat/test-assume.R
================================================
test_that("distribution description works as expected", {
  # extract the "first element" to convert to character
  assume_ <- function(...) {
    assume(...)[1]
  }

  expect_equal(
    gss |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      assume_(
        distribution = "F",
        df = c(length(unique(gss$partyid)) - 1, nrow(gss) - 4)
      ),
    "An F distribution with 3 and 496 degrees of freedom."
  )

  expect_equal(
    gss |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      assume_(
        distribution = "F",
        df = c(length(unique(gss$partyid)) - 1, nrow(gss) - 4)
      ),
    gss |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      assume_(distribution = "F")
  )

  expect_equal(
    gss |>
      specify(response = finrela) |>
      hypothesize(
        null = "point",
        p = c(
          "far below average" = 1 / 6,
          "below average" = 1 / 6,
          "average" = 1 / 6,
          "above average" = 1 / 6,
          "far above average" = 1 / 6,
          "DK" = 1 / 6
        )
      ) |>
      assume_("Chisq", length(unique(gss$finrela)) - 1),
    "A Chi-squared distribution with 5 degrees of freedom."
  )

  expect_equal(
    gss |>
      specify(response = finrela) |>
      hypothesize(
        null = "point",
        p = c(
          "far below average" = 1 / 6,
          "below average" = 1 / 6,
          "average" = 1 / 6,
          "above average" = 1 / 6,
          "far above average" = 1 / 6,
          "DK" = 1 / 6
        )
      ) |>
      assume_("Chisq"),
    "A Chi-squared distribution with 5 degrees of freedom."
  )

  expect_equal(
    gss |>
      specify(formula = finrela ~ sex) |>
      hypothesize(null = "independence") |>
      assume_(
        distribution = "Chisq",
        df = (length(unique(gss$finrela)) - 1) *
          (length(unique(gss$sex)) - 1)
      ),
    "A Chi-squared distribution with 5 degrees of freedom."
  )

  expect_equal(
    gss |>
      specify(formula = finrela ~ sex) |>
      hypothesize(null = "independence") |>
      assume_(distribution = "Chisq"),
    "A Chi-squared distribution with 5 degrees of freedom."
  )

  expect_equal(
    gss |>
      specify(age ~ college) |>
      hypothesize(null = "independence") |>
      assume_("t"),
    "A T distribution with 423 degrees of freedom."
  )

  expect_equal(
    gss |>
      specify(response = sex, success = "female") |>
      hypothesize(null = "point", p = .5) |>
      assume_("z"),
    "A Z distribution."
  )
})

test_that("assume errors with bad arguments", {
  # supply a bad distribution
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ college) |>
      hypothesize(null = "independence") |>
      assume("boop", nrow(gss) - 1)
  )

  # bad number of df arguments
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ college) |>
      hypothesize(null = "independence") |>
      assume("t", c(nrow(gss) - 1, 2))
  )

  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      assume("F", nrow(gss) - 1)
  )

  # bad df argument type
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      assume("F", "boop")
  )

  # df argument possibly passed to dots
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      assume("F", nrow(gss) - 1, 1)
  )

  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      assume("F", nrow(gss) - 1, 1, 2)
  )

  # supply `distribution`s that don't align with the supplied variables
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ finrela) |>
      hypothesize(null = "independence") |>
      assume("t", nrow(gss) - 1)
  )

  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ finrela) |>
      hypothesize(null = "independence") |>
      assume("z", nrow(gss) - 1)
  )

  expect_snapshot(
    error = TRUE,
    gss |>
      specify(age ~ NULL) |>
      hypothesize(null = "point", mu = 40) |>
      assume("z", nrow(gss) - 1)
  )

  # supply bad `x` arguments
  expect_snapshot(
    error = TRUE,
    gss |>
      assume("z", nrow(gss) - 1)
  )

  expect_snapshot(
    error = TRUE,
    "boop" |>
      assume("z", nrow(gss) - 1)
  )
})

test_that("assume() handles automatic df gracefully", {
  expect_equal(
    expect_silent(
      gss |>
        specify(response = hours) |>
        hypothesize(null = "point", mu = 40) |>
        assume("t")
    ),
    expect_silent(
      gss |>
        specify(response = hours) |>
        hypothesize(null = "point", mu = 40) |>
        assume("t")
    )
  )

  expect_snapshot(
    res_ <- gss |>
      specify(response = hours) |>
      hypothesize(null = "point", mu = 40) |>
      assume("t", nrow(gss) - 2)
  )

  # t.test param with var.equal = FALSE
  expect_equal(
    expect_silent(
      gss |>
        specify(age ~ college) |>
        hypothesize(null = "independence") |>
        assume(distribution = "t", 423) |>
        attr("df")
    ),
    423
  )

  # t.test param with var.equal = TRUE
  expect_equal(
    expect_silent(
      gss |>
        specify(age ~ college) |>
        hypothesize(null = "independence") |>
        assume(distribution = "t", 498) |>
        attr("df")
    ),
    498
  )

  # min(n1 - 1, n2 - 1)
  expect_equal(
    expect_silent(
      gss |>
        specify(age ~ college) |>
        hypothesize(null = "independence") |>
        assume(distribution = "t", 173) |>
        attr("df")
    ),
    173
  )

  # n1 + n2 - 2
  expect_equal(
    expect_silent(
      gss |>
        specify(age ~ college) |>
        hypothesize(null = "independence") |>
        assume(distribution = "t", 498) |>
        attr("df")
    ),
    498
  )
})

test_that("assume() brings along supplied arguments", {
  t_dist <- gss |>
    specify(age ~ college) |>
    hypothesize(null = "independence") |>
    assume("t")

  expect_equal(
    round(attr(t_dist, "df")),
    423
  )

  expect_equal(
    attr(t_dist, "distribution"),
    "t"
  )

  expect_equal(
    attr(t_dist, "theory_type"),
    "Two sample t"
  )

  expect_equal(
    attr(t_dist, "df"),
    attr(t_dist, "distr_param")
  )

  f_dist <- gss |>
    specify(age ~ partyid) |>
    hypothesize(null = "independence") |>
    assume(distribution = "F")

  expect_equal(
    attr(f_dist, "df"),
    c(attr(f_dist, "distr_param"), attr(f_dist, "distr_param2"))
  )
})

test_that("process_df works", {
  expect_equal(
    process_df(1),
    list(df = 1)
  )

  expect_equal(
    process_df(c(1, 2)),
    list(df1 = 1, df2 = 2)
  )

  expect_equal(
    process_df(NULL),
    list()
  )
})


================================================
FILE: tests/testthat/test-calculate.R
================================================
# calculate arguments
test_that("x is a tibble", {
  vec <- 1:10
  expect_snapshot(error = TRUE, calculate(vec, stat = "mean"))
})

test_that("calculate checks `stat` argument", {
  # stat is a string
  expect_snapshot(error = TRUE, calculate(gss_tbl, stat = 3))

  # stat is one of the implemented options with informative error
  gen_gss_slope <- gss_tbl |>
    specify(hours ~ age) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")

  expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "slopee"))
  expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "stdev"))
  expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "stat"))
  expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "chi sq"))

  # stat can be one of the allowed aliases
  chisq_df <- gss |> specify(formula = finrela ~ sex)
  expect_equal(
    calculate(chisq_df, stat = "Chisq")[["stat"]],
    calculate(chisq_df, stat = "chisq")[["stat"]]
  )

  f_df <- gss |> specify(age ~ partyid)
  expect_equal(
    calculate(f_df, stat = "F")[["stat"]],
    calculate(f_df, stat = "f")[["stat"]]
  )
})

test_that("errors informatively with incompatible stat vs hypothesis", {
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(college ~ sex, success = "degree") |>
      hypothesise(null = "point", p = .40) |>
      calculate(stat = "diff in props", order = c("female", "male"))
  )

  expect_snapshot(
    error = TRUE,
    gss |>
      specify(college ~ sex, success = "degree") |>
      hypothesise(null = "point", p = .40) |>
      generate(reps = 10, type = "draw") |>
      calculate(stat = "diff in props", order = c("female", "male"))
  )

  expect_silent(
    gss |>
      specify(hours ~ college) |>
      hypothesize(null = "point", mu = 40) |>
      calculate(stat = "t", order = c("degree", "no degree"))
  )

  expect_silent(
    gss |>
      specify(response = finrela) |>
      hypothesize(
        null = "point",
        p = c(
          "far below average" = 1 / 6,
          "below average" = 1 / 6,
          "average" = 1 / 6,
          "above average" = 1 / 6,
          "far above average" = 1 / 6,
          "DK" = 1 / 6
        )
      ) |>
      calculate(stat = "Chisq")
  )
})

test_that("response attribute has been set", {
  expect_snapshot(
    error = TRUE,
    tibble::as_tibble(gss) |> calculate(stat = "median")
  )
})

test_that("variable chosen is of appropriate class (one var problems)", {
  # One sample chisq example
  gen_gss1 <- gss_tbl |>
    specify(partyid ~ NULL) |>
    hypothesize(
      null = "point",
      p = c("dem" = .5, "rep" = .25, "ind" = .25)
    ) |>
    generate(reps = 10, type = "draw")
  expect_snapshot(error = TRUE, calculate(gen_gss1, stat = "mean"))

  # One mean example
  gen_gss_num <- gss_tbl |>
    specify(hours ~ NULL) |>
    hypothesize(null = "point", mu = 40) |>
    generate(reps = 10, type = "bootstrap")
  expect_snapshot(error = TRUE, calculate(gen_gss_num, stat = "prop"))
  expect_silent(calculate(gen_gss_num, stat = "mean"))
  expect_snapshot(error = TRUE, calculate(gen_gss_num, stat = "median"))
  expect_snapshot(error = TRUE, calculate(gen_gss_num, stat = "sd"))

  gen_gss_num2 <- gss_tbl |>
    specify(hours ~ NULL) |>
    hypothesize(null = "point", med = 40) |>
    generate(reps = 10, type = "bootstrap")
  expect_snapshot(error = TRUE, calculate(gen_gss_num2, stat = "prop"))
  expect_snapshot(error = TRUE, calculate(gen_gss_num2, stat = "mean"))
  expect_silent(calculate(gen_gss_num2, stat = "median"))
  expect_snapshot(error = TRUE, calculate(gen_gss_num2, stat = "sd"))

  gen_gss_num3 <- gss_tbl |>
    specify(hours ~ NULL) |>
    hypothesize(null = "point", sigma = 0.6) |>
    generate(reps = 10, type = "bootstrap")
  expect_snapshot(error = TRUE, calculate(gen_gss_num3, stat = "prop"))
  expect_snapshot(error = TRUE, calculate(gen_gss_num3, stat = "mean"))
  expect_snapshot(error = TRUE, calculate(gen_gss_num3, stat = "median"))
  expect_silent(calculate(gen_gss_num3, stat = "sd"))
})

test_that("grouping (explanatory) variable is a factor (two var problems)", {
  gen_gss2 <- gss_tbl |>
    specify(hours ~ age) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_snapshot(error = TRUE, calculate(gen_gss2, stat = "diff in means"))
  expect_snapshot(error = TRUE, calculate(gen_gss2, stat = "diff in medians"))
  # Since shifts to "Slope with t"
  ## Not implemented
  # expect_silent(calculate(gen_gss2, stat = "t"))
})

test_that("grouping (explanatory) variable is numeric (two var problems)", {
  gen_gss2a <- gss_tbl |>
    specify(partyid ~ hours) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_snapshot(error = TRUE, calculate(gen_gss2a, stat = "slope"))
  # Since shifts to "Slope with t"
  expect_snapshot(error = TRUE, calculate(gen_gss2a, stat = "t"))
  expect_snapshot(error = TRUE, calculate(gen_gss2a, stat = "diff in medians"))
})

test_that("response variable is a factor (two var problems)", {
  gen_gss3 <- gss_tbl |>
    specify(hours ~ partyid) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_snapshot(error = TRUE, calculate(gen_gss3, stat = "Chisq"))

  # explanatory has more than 2 levels
  gen_gss4 <- gss_tbl |>
    specify(sex ~ partyid, success = "female") |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_snapshot(error = TRUE, calculate(gen_gss4, stat = "diff in props"))
  expect_snapshot(error = TRUE, calculate(gen_gss4, stat = "ratio of props"))
  expect_snapshot(error = TRUE, calculate(gen_gss4, stat = "odds ratio"))

  expect_snapshot(error = TRUE, calculate(gen_gss4, stat = "t"))

  # Check successful diff in props
  gen_gss4a <- gss_tbl |>
    specify(college ~ sex, success = "no degree") |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_silent(
    calculate(gen_gss4a, stat = "diff in props", order = c("female", "male"))
  )
  expect_silent(
    calculate(gen_gss4a, stat = "ratio of props", order = c("female", "male"))
  )
  expect_silent(
    calculate(gen_gss4a, stat = "odds ratio", order = c("female", "male"))
  )
  expect_silent(
    calculate(gen_gss4a, stat = "z", order = c("female", "male"))
  )
  expect_snapshot(res_ <- calculate(gen_gss4a, stat = "z"))
})

gen_gss5 <- gss_tbl |>
  specify(partyid ~ hours) |>
  generate(reps = 10, type = "bootstrap")

test_that("response variable is numeric (two var problems)", {
  expect_snapshot(error = TRUE, calculate(gen_gss5, stat = "F"))
})

test_that("two sample mean-type problems are working", {
  gen_gss5a <- gss_tbl |>
    specify(hours ~ college) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_snapshot(res_ <- calculate(gen_gss5a, stat = "diff in means"))
  expect_silent(
    calculate(
      gen_gss5a,
      stat = "diff in means",
      order = c("no degree", "degree")
    )
  )
  expect_snapshot(res_ <- calculate(gen_gss5a, stat = "t"))
  expect_silent(calculate(
    gen_gss5a,
    stat = "t",
    order = c("no degree", "degree")
  ))
})

test_that("properties of tibble passed-in are correct", {
  expect_s3_class(gen_gss5, "grouped_df")
  expect_equal(ncol(gen_gss5), 3)

  gen_gss6 <- gss_tbl |>
    specify(hours ~ NULL) |>
    generate(reps = 10)
  expect_equal(ncol(gen_gss6), 2)
  expect_snapshot(error = TRUE, calculate(gen_gss6))
})

test_that("order is working for diff in means", {
  gen_gss7 <- gss_tbl |>
    specify(hours ~ college) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_equal(
    nrow(calculate(
      gen_gss7,
      stat = "diff in means",
      order = c("no degree", "degree")
    )),
    10
  )
  expect_equal(
    ncol(calculate(
      gen_gss7,
      stat = "diff in means",
      order = c("no degree", "degree")
    )),
    2
  )
})

test_that("chi-square matches chisq.test value", {
  gen_gss8 <- gss_tbl |>
    specify(sex ~ partyid, success = "female") |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  infer_way <- calculate(gen_gss8, stat = "Chisq")
  # chisq.test way
  suppressWarnings(
    trad_way <- gen_gss8 |>
      dplyr::group_by(replicate) |>
      dplyr::do(broom::tidy(
        stats::chisq.test(table(.$sex, .$partyid))
      )) |>
      dplyr::ungroup() |>
      dplyr::select(replicate, stat = statistic)
  )
  # Equal not including attributes
  expect_equal(infer_way, trad_way, ignore_attr = TRUE)

  gen_gss9 <- gss_tbl |>
    specify(partyid ~ NULL) |>
    hypothesize(
      null = "point",
      p = c("dem" = 1 / 3, "rep" = 1 / 3, "ind" = 1 / 3)
    ) |>
    generate(reps = 10, type = "draw")
  infer_way <- calculate(gen_gss9, stat = "Chisq")
  # chisq.test way
  trad_way <- gen_gss9 |>
    dplyr::group_by(replicate) |>
    dplyr::do(broom::tidy(
      stats::chisq.test(table(.$partyid))
    )) |>
    dplyr::select(replicate, stat = statistic)
  expect_equal(infer_way, trad_way, ignore_attr = TRUE)

  gen_gss9a <- gss_tbl |>
    specify(partyid ~ NULL) |>
    hypothesize(
      null = "point",
      p = c("dem" = 0.8, "rep" = 0.1, "ind" = 0.1)
    ) |>
    generate(reps = 10, type = "draw")
  infer_way <- calculate(gen_gss9a, stat = "Chisq")
  # chisq.test way
  trad_way <- gen_gss9a |>
    dplyr::group_by(replicate) |>
    dplyr::do(broom::tidy(
      stats::chisq.test(table(.$partyid), p = c(0.8, 0.1, 0.1))
    )) |>
    dplyr::select(replicate, stat = statistic)
  expect_equal(infer_way, trad_way, ignore_attr = TRUE)

  # check that dots are passed correctly
  dat <- data.frame(
    action = c(
      rep(x = "promote", times = 32),
      rep(x = "hold file", times = 12),
      rep(x = "promote", times = 19),
      rep(x = "hold file", times = 30)
    ),
    sex = c(rep(x = "male", times = 44), rep(x = "female", times = 49))
  )

  promote_f <- dat |>
    specify(action ~ sex, success = "promote") |>
    calculate(stat = "Chisq", order = c("male", "female"), correct = FALSE)

  promote_t <- dat |>
    specify(action ~ sex, success = "promote") |>
    calculate(stat = "Chisq", order = c("male", "female"), correct = TRUE)

  expect_false(promote_f$stat == promote_t$stat)

  expect_snapshot(
    error = TRUE,
    dat |>
      specify(action ~ sex, success = "promote") |>
      calculate(stat = "Chisq", order = c("male", "female"), correct = "boop")
  )
})

test_that("chi-square works with factors with unused levels", {
  test_tbl <- tibble(
    x = factor(c("a", "b", "c"), levels = c("a", "b", "c", "d")),
    y = factor(c("e", "e", "f"))
  )

  # Unused levels in explanatory variable
  expect_snapshot(
    out <- test_tbl |>
      specify(y ~ x) |>
      calculate(stat = "Chisq") |>
      pull()
  )
  expect_true(!is.na(out))

  # Unused levels in response variable
  test_tbl[["x"]] <- factor(test_tbl[["x"]])
  levels(test_tbl[["y"]]) <- c("e", "f", "g")
  expect_snapshot(
    out <- test_tbl |>
      specify(y ~ x) |>
      calculate(stat = "Chisq") |>
      pull()
  )
  expect_true(!is.na(out))
})

test_that("`order` is working", {
  gen_gss_tbl10 <- gss_tbl |>
    specify(hours ~ college) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_snapshot(
    error = TRUE,
    calculate(gen_gss_tbl10, stat = "diff in means", order = c(TRUE, FALSE))
  )

  gen_gss_tbl11 <- gss_tbl |>
    specify(hours ~ college) |>
    generate(reps = 10, type = "bootstrap")
  expect_snapshot(
    error = TRUE,
    calculate(gen_gss_tbl11, stat = "diff in medians", order = "no degree")
  )
  expect_snapshot(
    error = TRUE,
    calculate(
      gen_gss_tbl11,
      stat = "diff in medians",
      order = c(NA, "no degree")
    )
  )
  expect_snapshot(
    error = TRUE,
    calculate(
      gen_gss_tbl11,
      stat = "diff in medians",
      order = c("no degree", "other")
    )
  )
  expect_silent(
    calculate(
      gen_gss_tbl11,
      stat = "diff in medians",
      order = c("no degree", "degree")
    )
  )
  expect_snapshot(
    error = TRUE,
    calculate(
      gen_gss_tbl11,
      stat = "diff in means",
      order = c("no degree", "degree", "the last one")
    )
  )
  # order not given
  expect_snapshot(
    res_ <- calculate(gen_gss_tbl11, stat = "diff in means")
  )
})

gen_gss_tbl12 <- gss_tbl |>
  specify(college ~ NULL, success = "no degree") |>
  hypothesize(null = "point", p = 0.3) |>
  generate(reps = 10, type = "draw")

test_that('success is working for stat = "prop"', {
  expect_silent(gen_gss_tbl12 |> calculate(stat = "prop"))
  expect_silent(gen_gss_tbl12 |> calculate(stat = "z"))
})

test_that("NULL response gives error", {
  gss_tbl_improp <- tibble::as_tibble(gss_tbl) |>
    dplyr::select(hours, age)

  expect_snapshot(error = TRUE, gss_tbl_improp |> calculate(stat = "mean"))
})

test_that("Permute F test works", {
  gen_gss_tbl13 <- gss_tbl |>
    specify(hours ~ partyid) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_silent(calculate(gen_gss_tbl13, stat = "F"))
})

test_that("Permute slope/correlation test works", {
  gen_gss_tbl14 <- gss_tbl |>
    specify(hours ~ age) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_silent(calculate(gen_gss_tbl14, stat = "slope"))
  expect_silent(calculate(gen_gss_tbl14, stat = "correlation"))
})

test_that("order being given when not needed gives warning", {
  gen_gss_tbl15 <- gss_tbl |>
    specify(college ~ partyid, success = "no degree") |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute")
  expect_snapshot(
    res_ <- calculate(gen_gss_tbl15, stat = "Chisq", order = c("dem", "ind"))
  )
})

## Breaks oldrel build. Commented out for now.
# test_that("warning given if calculate without generate", {
#   expect_snapshot(
#     gss_tbl |>
#       specify(partyid ~ NULL) |>
#       hypothesize(
#         null = "point",
#         p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2)
#       ) |>
#       # generate(reps = 10, type = "draw") |>
#       calculate(stat = "Chisq")
#   )
# })

test_that("specify() |> calculate() works", {
  expect_silent(
    gss_tbl |> specify(hours ~ NULL) |> calculate(stat = "mean")
  )
  expect_snapshot(
    res_ <- gss_tbl |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 4) |>
      calculate(stat = "mean")
  )

  expect_snapshot(
    res_ <- gss_tbl |> specify(partyid ~ NULL) |> calculate(stat = "Chisq")
  )
})

test_that("One sample t hypothesis test is working", {
  expect_snapshot(
    res_ <- gss_tbl |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 1) |>
      generate(reps = 10) |>
      calculate(stat = "t")
  )

  expect_snapshot(
    res_ <- gss_tbl |>
      specify(response = hours) |>
      calculate(stat = "t")
  )

  gss_tbl |>
    specify(response = hours) |>
    calculate(stat = "t", mu = 1)
})

test_that("specify done before calculate", {
  gss_tbl_mean <- gss_tbl |>
    dplyr::select(stat = hours)
  expect_snapshot(error = TRUE, calculate(gss_tbl_mean, stat = "mean"))

  gss_tbl_prop <- gss_tbl |> dplyr::select(college)
  attr(gss_tbl_prop, "response") <- "college"
  expect_snapshot(error = TRUE, calculate(gss_tbl_prop, stat = "prop"))
  expect_snapshot(error = TRUE, calculate(gss_tbl_prop, stat = "count"))
})

test_that("chisq GoF has params specified for observed stat", {
  no_params <- gss_tbl |> specify(response = partyid)
  expect_snapshot(res_ <- calculate(no_params, stat = "Chisq"))

  params <- gss_tbl |>
    specify(response = partyid) |>
    hypothesize(
      null = "point",
      p = c("dem" = .5, "rep" = .25, "ind" = .25)
    )
  expect_silent(calculate(params, stat = "Chisq"))
})

test_that("One sample t bootstrap is working", {
  expect_snapshot(
    res_ <- gss_tbl |>
      specify(hours ~ NULL) |>
      generate(reps = 10, type = "bootstrap") |>
      calculate(stat = "t")
  )
})

test_that("calculate doesn't depend on order of `p` (#122)", {
  calc_chisq <- function(p) {
    set.seed(111)

    gss_tbl |>
      specify(partyid ~ NULL) |>
      hypothesize(null = "point", p = p) |>
      generate(reps = 500, type = "draw") |>
      calculate("Chisq") |>
      get_p_value(obs_stat = 5, direction = "right")
  }

  expect_equal(
    calc_chisq(c("rep" = 0.25, "dem" = 0.5, "ind" = 0.25)),
    calc_chisq(c("ind" = 0.25, "rep" = 0.25, "dem" = 0.5)),
    tolerance = eps
  )
})

test_that("calc_impl_one_f works", {
  expect_true(is.function(calc_impl_one_f(mean)))
})

test_that("calc_impl_diff_f works", {
  expect_true(is.function(calc_impl_diff_f(mean)))
})

test_that("calc_impl.sum works", {
  .subset_1 <- function(x) x[[1]]
  expect_equal(
    gss_tbl |>
      specify(hours ~ NULL) |>
      calculate(stat = "sum") |>
      .subset_1(),
    sum(gss_tbl$hours),
    tolerance = eps
  )

  gen_gss_tbl16 <- gss_tbl |>
    specify(hours ~ NULL) |>
    generate(10)

  expect_equal(
    gen_gss_tbl16 |> calculate(stat = "sum"),
    gen_gss_tbl16 |> dplyr::summarise(stat = sum(hours)),
    ignore_attr = TRUE
  )
})

test_that("calc_impl_success_f works", {
  expect_true(
    is.function(calc_impl_success_f(
      f = function(response, success, ...) {
        mean(response == success, ...)
      },
      output_name = "proportion"
    ))
  )
})

test_that("calc_impl.count works", {
  .subset_1 <- function(x) x[[1]]
  expect_equal(
    gss_tbl |>
      specify(college ~ NULL, success = "no degree") |>
      calculate(stat = "count") |>
      .subset_1(),
    sum(gss_tbl$college == "no degree"),
    tolerance = eps
  )

  expect_equal(
    gen_gss_tbl12 |> calculate(stat = "count"),
    gen_gss_tbl12 |> dplyr::summarise(stat = sum(college == "no degree")),
    ignore_attr = TRUE
  )
})


gss_biased <- gss_tbl |>
  dplyr::filter(!(sex == "male" & college == "no degree" & age < 40))

gss_tbl <- table(gss_biased$sex, gss_biased$college)

test_that("calc_impl.odds_ratio works", {
  base_odds_ratio <- {
    (gss_tbl[1, 1] * gss_tbl[2, 2]) /
      (gss_tbl[1, 2] * gss_tbl[2, 1])
  }

  expect_equal(
    gss_biased |>
      specify(college ~ sex, success = "degree") |>
      calculate(stat = "odds ratio", order = c("female", "male")) |>
      dplyr::pull(),
    expected = base_odds_ratio,
    tolerance = eps
  )
})

test_that("calc_impl.ratio_of_props works", {
  base_ratio_of_props <- {
    (gss_tbl[1, 2] / sum(gss_tbl[1, ])) /
      (gss_tbl[2, 2] / sum(gss_tbl[2, ]))
  }

  expect_equal(
    gss_biased |>
      specify(college ~ sex, success = "degree") |>
      calculate(stat = "ratio of props", order = c("male", "female")) |>
      dplyr::pull(),
    expected = base_ratio_of_props,
    tolerance = eps
  )
})

test_that("calc_impl.ratio_of_means works", {
  base_ratio_of_means <- {
    mean(gss$age[gss$college == "degree"]) /
      mean(gss$age[gss$college == "no degree"])
  }

  expect_equal(
    gss |>
      specify(age ~ college) |>
      calculate("ratio of means", order = c("degree", "no degree")) |>
      dplyr::pull(),
    expected = base_ratio_of_means,
    tolerance = eps
  )
})

test_that("calc_impl.z works for one sample proportions", {
  infer_obs_stat <- gss |>
    specify(response = sex, success = "female") |>
    hypothesize(null = "point", p = .5) |>
    calculate(stat = "z") |>
    dplyr::pull()

  base_obs_stat <-
    (mean(gss$sex == "female") - .5) /
    sqrt(.5^2 / nrow(gss))

  expect_equal(infer_obs_stat, base_obs_stat, tolerance = eps)
})

test_that("calculate warns informatively with insufficient null", {
  expect_snapshot(
    res_ <- gss |>
      specify(response = sex, success = "female") |>
      calculate(stat = "z")
  )

  expect_snapshot(
    res_ <- gss |>
      specify(hours ~ NULL) |>
      calculate(stat = "t")
  )

  expect_snapshot(
    res_ <- gss |>
      specify(response = partyid) |>
      calculate(stat = "Chisq")
  )
})

test_that("calculate messages informatively with excessive null", {
  expect_snapshot(
    res_ <- gss |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 40) |>
      calculate(stat = "mean")
  )

  expect_snapshot(
    res_ <- gss |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", sigma = 10) |>
      calculate(stat = "sd")
  )

  expect_snapshot(
    res_ <- gss |>
      specify(hours ~ college) |>
      hypothesize(null = "independence") |>
      calculate("diff in means", order = c("no degree", "degree"))
  )
})

test_that("calculate can handle variables named x", {
  expect_silent({
    t_0 <- data.frame(x = 1:10) |>
      specify(response = x) |>
      hypothesise(null = "point", mu = 0) |>
      calculate(stat = "t")
  })

  expect_silent({
    t_1 <- data.frame(sample = 1:10) |>
      specify(response = sample) |>
      hypothesise(null = "point", mu = 0) |>
      calculate(stat = "t")
  })

  expect_equal(
    unname(t_0$stat),
    unname(t_1$stat),
    tolerance = .001
  )
})

test_that("calculate errors out with multiple explanatory variables", {
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      calculate(stat = "t")
  )

  expect_snapshot(
    error = TRUE,
    gss |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 3, type = "permute") |>
      calculate(stat = "t")
  )
})

test_that("reported standard errors are correct", {
  # mean ---------------------------------------------------------------------
  x_bar <- gss |>
    specify(response = hours) |>
    calculate(stat = "mean")

  expect_equal(
    attr(x_bar, "se"),
    stats::sd(gss$hours) / sqrt(nrow(gss)),
    tolerance = 1e-6
  )

  # prop ---------------------------------------------------------------------
  p_hat <- gss |>
    specify(response = sex, success = "female") |>
    calculate(stat = "prop")

  expect_equal(
    attr(p_hat, "se"),
    sqrt(
      (mean(gss$sex == "female") * (1 - mean(gss$sex == "female"))) / nrow(gss)
    ),
    tolerance = 1e-6
  )

  # diff in means ------------------------------------------------------------
  diff_bar <- gss |>
    specify(hours ~ college) |>
    calculate(stat = "diff in means", order = c("no degree", "degree"))

  expect_equal(
    attr(diff_bar, "se"),
    sqrt(
      (stats::sd(gss$hours[gss$college == "degree"]) /
        sqrt(nrow(gss[gss$college == "degree", ])))^2 +
        (stats::sd(gss$hours[gss$college == "no degree"]) /
          sqrt(nrow(gss[gss$college == "no degree", ])))^2
    ),
    tolerance = 1e-6
  )

  # diff in props ------------------------------------------------------------
  diff_hat <- gss |>
    specify(sex ~ college, success = "female") |>
    calculate(stat = "diff in props", order = c("no degree", "degree"))

  expect_equal(
    attr(diff_hat, "se"),
    sqrt(
      abs(
        (mean(gss[gss$college == "degree", ]$sex == "female") *
          (1 - mean(gss[gss$college == "degree", ]$sex == "female"))) /
          nrow(gss[gss$college == "degree", ])
      ) +
        abs(
          (mean(gss[gss$college == "no degree", ]$sex == "female") *
            (1 - mean(gss[gss$college == "no degree", ]$sex == "female"))) /
            nrow(gss[gss$college == "no degree", ])
        )
    ),
    tolerance = 1e-6
  )

  # ratio of means ------------------------------------------------------------
  # this stat shares machinery with others that report se, so make
  # sure that we don't
  rat_hat <- gss |>
    specify(hours ~ college) |>
    calculate(stat = "ratio of means", order = c("no degree", "degree"))

  expect_null(attr(rat_hat, "se"))
})

test_that("arbitrary test statistic works", {
  # observed test statistics match pre-implemented ones
  obs_stat_manual <-
    gss |>
    specify(response = hours) |>
    calculate(stat = function(x, ...) {mean(x$hours)})

  obs_stat_pre_implemented <-
    gss |>
    specify(response = hours) |>
    calculate(stat = function(x, ...) {mean(x$hours)})

  expect_equal(obs_stat_manual, obs_stat_pre_implemented)

  # can supply a stat totally unknown to infer
  mode_hours <- function(x, ...) {
    hours_tbl <- table(x$hours)
    as.numeric(names(sort(hours_tbl)))[length(hours_tbl)]
  }

  obs_stat_manual <-
    gss |>
    specify(response = hours) |>
    calculate(stat = mode_hours)

  expect_s3_class(obs_stat_manual, c("infer", "tbl_df"))
  expect_named(obs_stat_manual, "stat")
  expect_equal(obs_stat_manual$stat[[1]], 40)

  # ...even one with a character value!
  mode_partyid <- function(x, ...) {
    partyid_tbl <- table(x$partyid)
    names(sort(partyid_tbl))[length(partyid_tbl)]
  }

  obs_stat_manual <-
    gss |>
    specify(response = partyid) |>
    calculate(stat = mode_partyid)

  expect_s3_class(obs_stat_manual, c("infer", "tbl_df"))
  expect_named(obs_stat_manual, "stat")
  expect_equal(obs_stat_manual$stat[[1]], "ind")

  # resampled test statistics match pre-implemented ones
  set.seed(1)
  stat_dist_manual <-
    gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    generate(reps = 5, type = "bootstrap") |>
    calculate(stat = function(x, ...) {mean(x$hours)})

  set.seed(1)
  stat_dist_pre_implemented <-
    gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    generate(reps = 5, type = "bootstrap") |>
    calculate(stat = function(x, ...) {mean(x$hours)})

  expect_equal(stat_dist_manual, stat_dist_pre_implemented)

  # errors and warnings are rethrown informatively
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(response = hours) |>
      # intentionally misspell `hour` to trigger warning
      calculate(stat = function(x, ...) {mean(x$hour)})
  )

  expect_snapshot(
    error = TRUE,
    gss |>
      specify(response = hours) |>
      # intentionally raise error
      calculate(stat = function(x, ...) {mean("hey there")})
  )

  # incompatible functions are handled gracefully
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(response = hours) |>
      calculate(stat = function(x, ...) {data.frame(woops = mean(x$hours))})
  )

  expect_snapshot(
    error = TRUE,
    gss |>
      specify(response = hours) |>
      calculate(stat = function(x, ...) {identity})
  )
})


================================================
FILE: tests/testthat/test-fit.R
================================================
x1 <- gss[1:100, ] |> specify(response = hours)
x2 <- gss[1:100, ] |> specify(hours ~ NULL)
x3 <- gss[1:100, ] |> specify(response = hours, explanatory = c(age, college))
x4 <- gss[1:100, ] |> specify(hours ~ age + college)

test_that("get_formula helper works", {
  expect_false(has_attr(x1, "formula"))
  expect_true(has_attr(x2, "formula"))
  expect_false(has_attr(x3, "formula"))
  expect_true(has_attr(x4, "formula"))

  expect_equal(get_formula(x1), get_formula(x2), ignore_attr = TRUE)
  expect_equal(get_formula(x3), get_formula(x4), ignore_attr = TRUE)
})

test_that("fit_linear_model helper works", {
  x3_m <-
    fit_linear_model(
      x3,
      get_formula(x3)
    )

  x4_m <-
    fit_linear_model(
      x3,
      get_formula(x3)
    )

  expect_equal(x3_m, x4_m)
  expect_equal(nrow(x3_m), 3)
  expect_equal(ncol(x3_m), 2)

  expect_equal(
    c("term", "estimate"),
    colnames(x3_m)
  )

  expect_equal(
    c("character", "numeric"),
    purrr::map_chr(x3_m, class) |> unname()
  )

  expect_equal(
    c("intercept", "age", "collegedegree"),
    x3_m$term
  )
})

test_that("fit.infer can handle generated objects", {
  x3_fit <- x3 |> fit()

  x3_gen_fit <- x3 |>
    hypothesize(null = 'independence') |>
    generate(reps = 2, type = "permute") |>
    fit()

  expect_equal(unique(x3_fit$term), unique(x3_gen_fit$term))
  expect_equal(nrow(x3_fit) * 2, nrow(x3_gen_fit))
  expect_equal(ncol(x3_fit) + 1, ncol(x3_gen_fit))
  expect_equal(length(unique(x3_gen_fit$replicate)), 2)
  expect_equal(
    colnames(x3_fit),
    colnames(x3_gen_fit)[colnames(x3_gen_fit) != "replicate"]
  )
})

test_that("fit.infer messages informatively on excessive null", {
  expect_snapshot(
    res_ <- gss |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      fit()
  )

  expect_silent(
    gss |>
      specify(hours ~ age + college) |>
      fit()
  )
})

test_that("fit.infer logistic regression works", {
  # linear regression default works
  expect_equal(
    gss |>
      specify(hours ~ age + college) |>
      fit(),
    gss |>
      specify(hours ~ age + college) |>
      fit(family = stats::gaussian)
  )

  # logistic regression default works
  expect_equal(
    gss |>
      specify(college ~ age + hours) |>
      fit(family = stats::binomial),
    gss |>
      specify(college ~ age + hours) |>
      fit()
  )

  # errors informatively with multinomial response variable
  expect_snapshot(
    error = TRUE,
    gss |>
      specify(finrela ~ age + college) |>
      fit()
  )

  # works as expected for `generate()`d objects
  fit_gen <- gss |>
    specify(college ~ age + hours) |>
    hypothesize(null = "independence") |>
    generate(type = "permute", reps = 2) |>
    fit()

  fit_obs <- gss |>
    specify(college ~ age + hours) |>
    fit()

  expect_equal(nrow(fit_gen), nrow(fit_obs) * 2)
  expect_equal(ncol(fit_gen), ncol(fit_obs) + 1)

  # responds to success argument
  fit_deg <- gss |>
    specify(college ~ age + hours, success = "degree") |>
    fit()

  fit_no_deg <- gss |>
    specify(college ~ age + hours, success = "no degree") |>
    fit()

  expect_equal(fit_deg$term, fit_no_deg$term)
  expect_equal(fit_deg$estimate, -fit_no_deg$estimate)
})


================================================
FILE: tests/testthat/test-generate.R
================================================
hyp_prop <- mtcars_df |>
  specify(response = am, success = "1") |>
  hypothesize(null = "point", p = .5)

hyp_diff_in_props <- mtcars_df |>
  specify(am ~ vs, success = "1") |>
  hypothesize(null = "independence")

hyp_chisq_gof <- mtcars_df |>
  specify(response = cyl) |>
  hypothesize(null = "point", p = c("4" = 1 / 3, "6" = 1 / 3, "8" = 1 / 3))

hyp_chisq_ind <- mtcars_df |>
  specify(cyl ~ vs) |>
  hypothesize(null = "independence")

hyp_mean <- mtcars_df |>
  specify(response = mpg) |>
  hypothesize(null = "point", mu = 3)

hyp_median <- mtcars_df |>
  specify(response = mpg) |>
  hypothesize(null = "point", med = 3)

hyp_sd <- mtcars_df |>
  specify(response = mpg) |>
  hypothesize(null = "point", sigma = 7)

hyp_diff_in_means <- mtcars_df |>
  specify(mpg ~ vs) |>
  hypothesize(null = "independence")

hyp_anova <- mtcars_df |>
  specify(mpg ~ cyl) |>
  hypothesize(null = "independence")

test_that("cohesion with type argument", {
  expect_snapshot(res_ <- generate(hyp_prop, type = "bootstrap"))
  expect_silent(generate(hyp_diff_in_props, type = "bootstrap"))
  expect_snapshot(res_ <- generate(hyp_chisq_gof, type = "bootstrap"))
  expect_silent(generate(hyp_chisq_ind, type = "bootstrap"))
  expect_silent(generate(hyp_mean, type = "bootstrap"))
  expect_silent(generate(hyp_median, type = "bootstrap"))
  expect_silent(generate(hyp_sd, type = "bootstrap"))
  expect_silent(generate(hyp_diff_in_means, type = "bootstrap"))
  expect_silent(generate(hyp_anova, type = "bootstrap"))

  expect_silent(generate(hyp_prop, type = "draw"))
  expect_snapshot(res_ <- generate(hyp_diff_in_props, type = "draw"))
  expect_silent(generate(hyp_chisq_gof, type = "draw"))
  expect_snapshot(res_ <- generate(hyp_chisq_ind, type = "draw"))
  expect_snapshot(error = TRUE, res_ <- generate(hyp_mean, type = "draw"))
  expect_snapshot(res_ <- generate(hyp_diff_in_means, type = "draw"))
  expect_snapshot(res_ <- generate(hyp_anova, type = "draw"))

  expect_snapshot(error = TRUE, res_ <- generate(hyp_prop, type = "permute"))
  expect_silent(generate(hyp_diff_in_props, type = "permute"))
  expect_snapshot(
    error = TRUE,
    res_ <- generate(hyp_chisq_gof, type = "permute")
  )
  expect_silent(generate(hyp_chisq_ind, type = "permute"))
  expect_snapshot(error = TRUE, res_ <- generate(hyp_mean, type = "permute"))
  expect_silent(generate(hyp_diff_in_means, type = "permute"))
  expect_silent(generate(hyp_anova, type = "permute"))
})

test_that("sensible output", {
  expect_equal(
    nrow(mtcars_df) * 500,
    nrow(generate(hyp_prop, reps = 500, type = "draw"))
  )
  expect_silent(generate(hyp_mean, reps = 1, type = "bootstrap"))
  expect_snapshot(error = TRUE, generate(hyp_mean, reps = 1, type = "other"))
  expect_equal(class(generate(hyp_mean, type = "bootstrap"))[1], "infer")
})

test_that("auto `type` works (generate)", {
  one_mean <- mtcars_df |>
    specify(response = mpg) |> # formula alt: mpg ~ NULL
    hypothesize(null = "point", mu = 25) |>
    generate(reps = 100)

  one_nonshift_mean <- mtcars_df |>
    specify(response = mpg) |>
    generate(reps = 100)

  one_median <- mtcars_df |>
    specify(response = mpg) |> # formula alt: mpg ~ NULL
    hypothesize(null = "point", med = 26) |>
    generate(reps = 100)

  one_prop <- mtcars_df |>
    specify(response = am, success = "1") |> # formula alt: am ~ NULL
    hypothesize(null = "point", p = .25) |>
    generate(reps = 100)

  two_props <- mtcars_df |>
    specify(am ~ vs, success = "1") |> # alt: response = am, explanatory = vs
    hypothesize(null = "independence") |>
    generate(reps = 100)

  gof_chisq <- mtcars_df |>
    specify(cyl ~ NULL) |> # alt: response = cyl
    hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) |>
    generate(reps = 100)

  indep_chisq <- mtcars_df |>
    specify(cyl ~ am) |> # alt: response = cyl, explanatory = am
    hypothesize(null = "independence") |>
    generate(reps = 100)

  two_means <- mtcars_df |>
    specify(mpg ~ am) |> # alt: response = mpg, explanatory = am
    hypothesize(null = "independence") |>
    generate(reps = 100)

  anova_f <- mtcars_df |>
    specify(mpg ~ cyl) |> # alt: response = mpg, explanatory = cyl
    hypothesize(null = "independence") |>
    generate(reps = 100)

  slopes <- mtcars_df |>
    specify(mpg ~ hp) |> # alt: response = mpg, explanatory = cyl
    hypothesize(null = "independence") |>
    generate(reps = 100)

  one_nonshift_prop <- mtcars_df |>
    specify(response = am, success = "1") |>
    generate(reps = 100)

  two_means_boot <- mtcars_df |>
    specify(mpg ~ am) |>
    generate(reps = 100)

  two_props_boot <- mtcars_df |>
    specify(am ~ vs, success = "1") |>
    generate(reps = 100)

  slope_boot <- mtcars_df |>
    specify(mpg ~ hp) |>
    generate(reps = 100)

  expect_equal(attr(one_mean, "type"), "bootstrap")
  expect_equal(attr(one_nonshift_mean, "type"), "bootstrap")
  expect_equal(attr(one_median, "type"), "bootstrap")
  expect_equal(attr(one_prop, "type"), "draw")
  expect_equal(attr(two_props, "type"), "permute")
  expect_equal(attr(gof_chisq, "type"), "draw")
  expect_equal(attr(indep_chisq, "type"), "permute")
  expect_equal(attr(two_means, "type"), "permute")
  expect_equal(attr(anova_f, "type"), "permute")
  expect_equal(attr(slopes, "type"), "permute")
  expect_equal(attr(one_nonshift_prop, "type"), "bootstrap")
  expect_equal(attr(two_means_boot, "type"), "bootstrap")
  expect_equal(attr(two_props_boot, "type"), "bootstrap")
  expect_equal(attr(slope_boot, "type"), "bootstrap")

  expect_snapshot(
    error = TRUE,
    mtcars_df |>
      specify(response = mpg) |> # formula alt: mpg ~ NULL
      hypothesize(null = "point", mu = 25) |>
      generate(reps = 100, type = "permute")
  )

  expect_snapshot(
    res_ <- mtcars_df |>
      specify(response = mpg) |>
      generate(reps = 100, type = "draw")
  )

  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(response = mpg) |> # formula alt: mpg ~ NULL
      hypothesize(null = "point", med = 26) |>
      generate(reps = 100, type = "permute")
  )

  expect_snapshot(
    res_ <- mtcars_df |>
      specify(response = am, success = "1") |> # formula alt: am ~ NULL
      hypothesize(null = "point", p = .25) |>
      generate(reps = 100, type = "bootstrap")
  )

  expect_silent(
    mtcars_df |>
      specify(am ~ vs, success = "1") |> # alt: response = am, explanatory = vs
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "bootstrap")
  )

  expect_snapshot(
    res_ <- mtcars_df |>
      specify(cyl ~ NULL) |> # alt: response = cyl
      hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) |>
      generate(reps = 100, type = "bootstrap")
  )

  expect_snapshot(
    res_ <- mtcars_df |>
      specify(cyl ~ am) |> # alt: response = cyl, explanatory = am
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "draw")
  )

  expect_silent(
    mtcars_df |>
      specify(mpg ~ am) |> # alt: response = mpg, explanatory = am
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "bootstrap")
  )

  expect_silent(
    mtcars_df |>
      specify(mpg ~ am) |> # alt: response = mpg, explanatory = am
      generate(reps = 100, type = "bootstrap")
  )

  expect_snapshot(
    res_ <- mtcars_df |>
      specify(mpg ~ cyl) |> # alt: response = mpg, explanatory = cyl
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "draw")
  )

  expect_silent(
    mtcars_df |>
      specify(mpg ~ hp) |> # alt: response = mpg, explanatory = cyl
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "bootstrap")
  )

  expect_snapshot(
    res_ <- mtcars_df |>
      specify(response = am, success = "1") |>
      generate(reps = 100, type = "draw")
  )

  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(mpg ~ am) |>
      generate(reps = 100, type = "permute")
  )

  expect_snapshot(
    res_ <- mtcars_df |>
      specify(am ~ vs, success = "1") |>
      generate(reps = 100, type = "draw")
  )

  expect_snapshot(
    res_ <- mtcars_df |>
      specify(mpg ~ hp) |>
      generate(reps = 100, type = "draw")
  )
})

test_that("mismatches lead to error", {
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |> generate(reps = 10, type = "permute")
  )
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(am ~ NULL, success = "1") |>
      hypothesize(null = "independence", p = c("1" = 0.5)) |>
      generate(reps = 100, type = "draw")
  )
  expect_snapshot(
    res_ <- mtcars_df |>
      specify(cyl ~ NULL) |> # alt: response = cyl
      hypothesize(
        null = "point",
        p = c("4" = .5, "6" = .25, "8" = .25)
      ) |>
      generate(reps = 100, type = "bootstrap")
  )
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(mpg ~ hp) |>
      generate(reps = 100, type = "other")
  )
})

test_that("generate() handles `NULL` value of `type`", {
  withr::local_envvar(SUPPRESS_INFER_MESSAGES = "false")

  expect_snapshot(
    res_ <- generate(hyp_prop, type = NULL)
  )
})

test_that("generate() handles `x` response", {
  expect_named(
    data.frame(x = factor(rbinom(100, size = 1, prob = .5))) |>
      specify(response = x, success = "1") |>
      hypothesize(null = "point", p = .5) |>
      generate(reps = 100, type = "draw"),
    c("x", "replicate")
  )

  expect_named(
    data.frame(category = c(rep(c("A", "B"), each = 5)), x = 1:10) |>
      specify(explanatory = category, response = x) |>
      hypothesize(null = "independence") |>
      generate(reps = 5, type = "permute"),
    c("x", "category", "replicate")
  )
})

test_that("generate() can permute with multiple explanatory variables", {
  # if the y variable is the one being permuted and the x's
  # are being left alone, then each age + college combination
  # should exist in every replicate
  equals_3 <- function(x) {
    x == 3
  }
  expect_true(
    gss |>
      # add random noise to make the variable truly continuous
      dplyr::mutate(age = age + rnorm(nrow(gss))) |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 3, type = "permute") |>
      dplyr::ungroup() |>
      dplyr::count(age, college) |>
      dplyr::pull(n) |>
      equals_3() |>
      all()
  )

  x <- gss |>
    specify(hours ~ age + college) |>
    hypothesize(null = "independence") |>
    generate(reps = 3, type = "permute")

  expect_true(inherits(x, "infer"))
  expect_true(inherits(explanatory_variable(x), "tbl_df"))
  expect_true(inherits(explanatory_name(x), "character"))
  expect_true(inherits(explanatory_expr(x), "call"))

  expect_equal(explanatory_name(x), c("age", "college"))
  expect_equal(response_name(x), "hours")

  expect_equal(nrow(x), 1500)
  expect_equal(ncol(x), 4)
})

test_that("generate is sensitive to the variables argument", {
  # default argument works appropriately
  expect_equal(
    {
      set.seed(1)

      gss[1:10, ] |>
        specify(hours ~ age + college) |>
        hypothesize(null = "independence") |>
        generate(reps = 2, type = "permute")
    },
    {
      set.seed(1)

      gss[1:10, ] |>
        specify(hours ~ age + college) |>
        hypothesize(null = "independence") |>
        generate(reps = 2, type = "permute", variables = hours)
    }
  )

  # permuting changes output
  expect_silent(
    perm_age <- gss[1:10, ] |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = age)
  )

  expect_false(all(perm_age$age[1:10] == perm_age$age[11:20]))
  expect_true(all(perm_age$hours[1:10] == perm_age$hours[11:20]))
  expect_true(all(perm_age$college[1:10] == perm_age$college[11:20]))

  expect_silent(
    perm_college <- gss[1:10, ] |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = college)
  )

  expect_true(all(perm_college$age[1:10] == perm_college$age[11:20]))
  expect_true(all(perm_college$hours[1:10] == perm_college$hours[11:20]))
  expect_false(all(perm_college$college[1:10] == perm_college$college[11:20]))

  expect_silent(
    perm_college_age <- gss[1:10, ] |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = c(college, age))
  )

  expect_false(all(perm_college_age$age[1:10] == perm_college_age$age[11:20]))
  expect_true(all(
    perm_college_age$hours[1:10] == perm_college_age$hours[11:20]
  ))
  expect_false(all(
    perm_college_age$college[1:10] == perm_college_age$college[11:20]
  ))

  # interaction effects are ignored
  expect_equal(
    {
      set.seed(1)

      expect_message(
        res_1 <- gss[1:10, ] |>
          specify(hours ~ age + college) |>
          hypothesize(null = "independence") |>
          generate(
            reps = 2,
            type = "permute",
            variables = c(hours, age * college)
          )
      )

      res_1
    },
    {
      set.seed(1)

      gss[1:10, ] |>
        specify(hours ~ age + college) |>
        hypothesize(null = "independence") |>
        generate(reps = 2, type = "permute", variables = hours)
    }
  )
})

test_that("variables argument prompts when it ought to", {
  expect_snapshot(
    error = TRUE,
    res_ <- gss[1:10, ] |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = c(howdy))
  )

  expect_snapshot(
    error = TRUE,
    res <- gss[1:10, ] |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = c(howdy, doo))
  )

  expect_snapshot(
    res_ <- gss[1:10, ] |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 40) |>
      generate(reps = 2, type = "bootstrap", variables = c(hours))
  )

  expect_snapshot(
    error = TRUE,
    res_ <- gss[1:10, ] |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = "hours")
  )

  expect_snapshot(
    res_ <- gss[1:10, ] |>
      specify(hours ~ age + college + age * college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = age * college)
  )

  expect_snapshot(
    res_ <- gss[1:10, ] |>
      specify(hours ~ age + college + age * college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = c(hours, age * college))
  )

  expect_silent(
    gss[1:10, ] |>
      specify(hours ~ age + college + age * college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute", variables = c(hours))
  )

  expect_silent(
    gss[1:10, ] |>
      specify(hours ~ age + college + age * college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute")
  )

  expect_silent(
    gss[1:10, ] |>
      specify(hours ~ age + college) |>
      hypothesize(null = "independence") |>
      generate(reps = 2, type = "permute")
  )

  # warn on type != permute but don't raise message re: interaction
  # effects unless otherwise used appropriately
  expect_snapshot(
    res_ <- gss[1:10, ] |>
      specify(hours ~ age * college) |>
      generate(
        reps = 2,
        type = "bootstrap",
        variables = c(hours, age * college)
      )
  )
})

test_that("type = 'draw'/'simulate' superseding handled gracefully", {
  # message on type = 'simulate'
  expect_snapshot(
    res_ <- mtcars_df |>
      specify(response = am, success = "1") |>
      hypothesize(null = "point", p = .5) |>
      generate(type = "simulate")
  )

  # don't message on type = 'draw'
  expect_silent(
    mtcars_df |>
      specify(response = am, success = "1") |>
      hypothesize(null = "point", p = .5) |>
      generate(type = "draw")
  )

  # mention new generation types when supplied a bad one
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(response = am, success = "1") |>
      hypothesize(null = "point", p = .5) |>
      generate(type = "boop")
  )

  # warns with either alias when given unexpected generate type
  expect_snapshot(
    error = TRUE,
    mtcars_df |>
      specify(response = mpg) |>
      hypothesize(null = "point", mu = 20) |>
      generate(type = "draw")
  )

  expect_snapshot(
    error = TRUE,
    mtcars_df |>
      specify(response = mpg) |>
      hypothesize(null = "point", mu = 20) |>
      generate(type = "draw")
  )

  expect_equal(
    {
      set.seed(1)

      expect_message(
        res_1 <- mtcars_df |>
          specify(response = am, success = "1") |>
          hypothesize(null = "point", p = .5) |>
          generate(type = "simulate")
      )

      res_1
    },
    {
      set.seed(1)

      res_2 <- mtcars_df |>
        specify(response = am, success = "1") |>
        hypothesize(null = "point", p = .5) |>
        generate(type = "draw")

      res_2
    },
    ignore_attr = TRUE
  )
})

test_that("has_p_param handles edge cases", {
  x <- NA

  set_p_names <- function(x, to) {
    attr(x, "params") <- rep(NA, length(to))
    names(attr(x, "params")) <- to
    x
  }

  expect_true(has_p_param(set_p_names(x, c("p.boop"))))
  expect_true(has_p_param(set_p_names(x, c("p.boop", "p.bop"))))
  expect_false(has_p_param(set_p_names(x, c("p.boop", "pbop"))))
  expect_false(has_p_param(set_p_names(x, c("p.boop", "bo.p"))))
  expect_false(has_p_param(set_p_names(x, c("p.boop", "pbop"))))
  expect_false(has_p_param(set_p_names(x, c(".p.boop"))))
  expect_false(has_p_param(set_p_names(x, c("beep.boop"))))
})


================================================
FILE: tests/testthat/test-get_confidence_interval.R
================================================
set.seed(2018)
test_df <- gss_calc[1:20, ]
test_df$stat <- c(
  -5,
  -4,
  -4,
  -4,
  -1,
  -0.5,
  rep(0, 6),
  1,
  1,
  3.999,
  4,
  4,
  4.001,
  5,
  5
)
point <- mean(test_df[["stat"]])

perc_def_out <- tibble::tibble(
  lower_ci = unname(quantile(test_df[["stat"]], 0.025)),
  upper_ci = unname(quantile(test_df[["stat"]], 0.975))
)

test_that("get_confidence_interval works with defaults", {
  expect_equal(test_df |> get_confidence_interval(), perc_def_out)
})

test_that("get_confidence_interval works with `type = 'percentile'`", {
  expect_equal(
    test_df |> get_confidence_interval(type = "percentile"),
    perc_def_out
  )

  expect_equal(
    test_df |> get_confidence_interval(level = 0.5, type = "percentile"),
    tibble::tibble(
      lower_ci = unname(quantile(test_df[["stat"]], 0.25)),
      upper_ci = unname(quantile(test_df[["stat"]], 0.75))
    )
  )
})

test_that("get_confidence_interval works with `type = 'se'`", {
  expect_equal(
    test_df |>
      get_confidence_interval(type = "se", point_estimate = point),
    tibble::tibble(lower_ci = -5.653, upper_ci = 6.603),
    tolerance = 1e-3,
    ignore_attr = TRUE
  )

  # use equivalent rather than equal as ci has attributes for se and point est
  expect_equal(
    test_df |>
      get_confidence_interval(level = 0.5, type = "se", point_estimate = point),
    tibble::tibble(lower_ci = -1.633, upper_ci = 2.583),
    tolerance = 1e-3,
    ignore_attr = TRUE
  )
})

test_that("get_confidence_interval works with `type = 'bias-corrected'`", {
  expect_equal(
    test_df |>
      get_confidence_interval(
        type = "bias-corrected",
        point_estimate = point
      ),
    tibble::tibble(lower_ci = -4.00, upper_ci = 5),
    tolerance = 1e-3
  )

  expect_equal(
    test_df |>
      get_confidence_interval(
        level = 0.5,
        type = "bias-corrected",
        point_estimate = point
      ),
    tibble::tibble(lower_ci = 0, upper_ci = 4.0007),
    tolerance = 1e-3
  )
})

test_that("get_confidence_interval supports data frame `point_estimate`", {
  point_df <- data.frame(p = point)

  expect_equal(
    test_df |> get_confidence_interval(type = "se", point_estimate = point),
    test_df |> get_confidence_interval(type = "se", point_estimate = point_df),
    tolerance = eps
  )
  expect_equal(
    test_df |>
      get_confidence_interval(type = "bias-corrected", point_estimate = point),
    test_df |>
      get_confidence_interval(
        type = "bias-corrected",
        point_estimate = point_df
      ),
    tolerance = eps
  )
})

test_that("get_confidence_interval messages with no explicit `level`", {
  withr::local_envvar(SUPPRESS_INFER_MESSAGES = "false")
  expect_snapshot(res_ <- get_confidence_interval(test_df))
  expect_silent(get_confidence_interval(test_df, level = 0.95))
  expect_silent(get_confidence_interval(test_df, 0.95))
})

test_that("get_confidence_interval checks input", {
  expect_snapshot(
    error = TRUE,
    test_df |> get_confidence_interval(type = "other")
  )
  expect_snapshot(
    error = TRUE,
    test_df |> get_confidence_interval(level = 1.2)
  )

  expect_snapshot(
    error = TRUE,
    test_df |> get_confidence_interval(point_estimate = "a")
  )
  expect_snapshot(
    error = TRUE,
    test_df |> get_confidence_interval(type = "se", point_estimate = "a")
  )
  expect_snapshot(
    error = TRUE,
    test_df |>
      get_confidence_interval(
        type = "se",
        point_estimate = data.frame(p = "a")
      )
  )

  expect_snapshot(
    error = TRUE,
    test_df |> get_confidence_interval(type = "se")
  )
  expect_snapshot(
    error = TRUE,
    test_df |> get_confidence_interval(type = "bias-corrected")
  )
})


test_that("get_confidence_interval can handle fitted objects", {
  # generate example objects
  set.seed(1)

  null_fits <- gss[1:50, ] |>
    specify(hours ~ age + college) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute") |>
    fit()

  obs_fit <- gss[1:50, ] |>
    specify(hours ~ age + college) |>
    fit()

  # check each ci type
  expect_equal(
    get_confidence_interval(null_fits, point_estimate = obs_fit, level = .95),
    structure(
      list(
        term = c("age", "collegedegree", "intercept"),
        lower_ci = c(-0.2139, -6.6020, 36.4537),
        upper_ci = c(0.1064, 8.7479, 50.8005)
      ),
      row.names = c(NA, -3L),
      class = c("tbl_df", "tbl", "data.frame")
    ),
    tolerance = 1e-3,
    ignore_attr = TRUE
  )

  expect_equal(
    get_confidence_interval(
      null_fits,
      point_estimate = obs_fit,
      level = .95,
      type = "se"
    ),
    structure(
      list(
        term = c("age", "collegedegree", "intercept"),
        lower_ci = c(-0.3809, -13.6182, 36.8694),
        upper_ci = c(0.1124, 6.1680, 59.1752)
      ),
      row.names = c(NA, -3L),
      class = c("tbl_df", "tbl", "data.frame")
    ),
    tolerance = 1e-3,
    ignore_attr = TRUE
  )

  expect_equal(
    get_confidence_interval(
      null_fits,
      point_estimate = obs_fit,
      level = .95,
      type = "bias-corrected"
    ),
    structure(
      list(
        term = c("age", "collegedegree", "intercept"),
        lower_ci = c(-0.2177, -7.1506, 37.2941),
        upper_ci = c(0.0806, 1.9707, 51.0512)
      ),
      row.names = c(NA, -3L),
      class = c("tbl_df", "tbl", "data.frame")
    ),
    tolerance = 1e-3,
    ignore_attr = TRUE
  )

  # errors out when it ought to
  obs_fit_2 <- gss[1:50, ] |>
    specify(hours ~ age) |>
    fit()

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(null_fits, point_estimate = obs_fit_2, level = .95)
  )

  obs_fit_3 <-
    obs_fit_2 <- gss[1:50, ] |>
      specify(year ~ age + college) |>
      fit()

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(null_fits, point_estimate = obs_fit_3, level = .95)
  )
})

test_that("get_confidence_interval can handle bad args with fitted objects", {
  set.seed(1)

  null_fits <- gss[1:50, ] |>
    specify(hours ~ age + college) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute") |>
    fit()

  obs_fit <- gss[1:50, ] |>
    specify(hours ~ age + college) |>
    fit()

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(null_fits, point_estimate = "boop", level = .95)
  )

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(
      null_fits,
      point_estimate = obs_fit$estimate,
      level = .95
    )
  )

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(obs_fit, point_estimate = null_fits, level = .95)
  )
})

test_that("theoretical CIs align with simulation-based (mean)", {
  x_bar <- gss |>
    specify(response = hours) |>
    calculate(stat = "mean")

  set.seed(1)

  null_dist <- gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    generate(reps = 1e3, type = "bootstrap") |>
    calculate(stat = "mean")

  null_dist_theory <- gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    assume(distribution = "t")

  expect_equal(
    get_confidence_interval(
      null_dist,
      .95,
      type = "se",
      point_estimate = x_bar
    ),
    get_confidence_interval(
      null_dist_theory,
      .95,
      type = "se",
      point_estimate = x_bar
    ),
    tolerance = .2
  )
})

test_that("theoretical CIs align with simulation-based (prop)", {
  p_hat <- gss |>
    specify(response = sex, success = "female") |>
    calculate(stat = "prop")

  set.seed(1)

  null_dist <- gss |>
    specify(response = sex, success = "female") |>
    hypothesize(null = "point", p = .5) |>
    generate(reps = 1e3, type = "draw") |>
    calculate(stat = "prop")

  null_dist_theory <- gss |>
    specify(response = sex, success = "female") |>
    assume(distribution = "z")

  expect_equal(
    get_confidence_interval(
      null_dist,
      .95,
      type = "se",
      point_estimate = p_hat
    ),
    get_confidence_interval(
      null_dist_theory,
      .95,
      type = "se",
      point_estimate = p_hat
    ),
    tolerance = .05
  )
})

test_that("theoretical CIs align with simulation-based (diff in means)", {
  diff_bar <- gss |>
    specify(age ~ college) |>
    calculate(stat = "diff in means", order = c("degree", "no degree"))

  set.seed(1)

  null_dist <- gss |>
    specify(age ~ college) |>
    hypothesize(null = "independence") |>
    generate(reps = 3e3, type = "permute") |>
    calculate(stat = "diff in means", order = c("degree", "no degree"))

  null_dist_theory <- gss |>
    specify(age ~ college) |>
    assume(distribution = "t")

  expect_equal(
    get_confidence_interval(
      null_dist,
      .95,
      type = "se",
      point_estimate = diff_bar
    ),
    get_confidence_interval(
      null_dist_theory,
      .95,
      type = "se",
      point_estimate = diff_bar
    ),
    tolerance = .15
  )
})

test_that("theoretical CIs align with simulation-based (diff in props)", {
  diff_hat <- gss |>
    specify(college ~ sex, success = "no degree") |>
    calculate(stat = "diff in props", order = c("female", "male"))

  set.seed(1)

  null_dist <- gss |>
    specify(college ~ sex, success = "no degree") |>
    hypothesize(null = "independence") |>
    generate(reps = 1e3, type = "permute") |>
    calculate(stat = "diff in props", order = c("female", "male"))

  null_dist_theory <- gss |>
    specify(college ~ sex, success = "no degree") |>
    assume(distribution = "z")

  expect_equal(
    get_confidence_interval(
      null_dist,
      .95,
      type = "se",
      point_estimate = diff_hat
    ),
    get_confidence_interval(
      null_dist_theory,
      .95,
      type = "se",
      point_estimate = diff_hat
    ),
    tolerance = .001
  )
})

test_that("theoretical CIs check arguments properly", {
  x_bar <- gss |>
    specify(response = hours) |>
    calculate(stat = "mean")

  null_dist_theory <- gss |>
    specify(age ~ college) |>
    assume(distribution = "t")

  # check that type is handled correctly
  expect_equal(
    get_confidence_interval(
      null_dist_theory,
      level = .95,
      point_estimate = x_bar
    ),
    get_confidence_interval(
      null_dist_theory,
      level = .95,
      type = "se",
      point_estimate = x_bar
    )
  )

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(
      null_dist_theory,
      level = .95,
      type = "percentile",
      point_estimate = x_bar
    )
  )

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(
      null_dist_theory,
      level = .95,
      type = "boop",
      point_estimate = x_bar
    )
  )

  # check that point estimate hasn't been post-processed
  expect_snapshot(
    error = TRUE,
    get_confidence_interval(
      null_dist_theory,
      level = .95,
      point_estimate = dplyr::pull(x_bar)
    )
  )

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(
      null_dist_theory,
      level = .95,
      point_estimate = x_bar$stat
    )
  )

  # check that statistics are implemented
  obs_t <- gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    calculate(stat = "t")

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(
      null_dist_theory,
      level = .95,
      point_estimate = obs_t
    )
  )

  # check that stat and distribution align
  p_hat <- gss |>
    specify(response = sex, success = "female") |>
    calculate(stat = "prop")

  null_dist_z <- gss |>
    specify(response = sex, success = "female") |>
    assume(distribution = "z")

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(
      null_dist_theory,
      level = .95,
      point_estimate = p_hat
    )
  )

  expect_snapshot(
    error = TRUE,
    get_confidence_interval(
      null_dist_z,
      level = .95,
      point_estimate = x_bar
    )
  )
})

test_that("handles missing values gracefully (#520)", {
  data <- data.frame(
    prop = seq(0, 1, length.out = 10),
    group = rep(c("a", "b"), each = 5L)
  )

  set.seed(1)
  boot_dist <-
    data |>
    specify(prop ~ group) |>
    hypothesize(null = "independence") |>
    generate(reps = 1000, type = "bootstrap") |>
    calculate(stat = "diff in medians", order = c("b", "a"))

  expect_snapshot(res <- get_confidence_interval(boot_dist, .95))

  expect_s3_class(res, "data.frame")
})


================================================
FILE: tests/testthat/test-get_p_value.R
================================================
set.seed(2018)
test_df <- gss_calc[1:20, ]
test_df$stat <- sample(c(
  -5,
  -4,
  -4,
  -4,
  -1,
  -0.5,
  rep(0, 6),
  1,
  1,
  3.999,
  4,
  4,
  4.001,
  5,
  5
))

test_that("direction is appropriate", {
  expect_snapshot(
    error = TRUE,
    test_df |> get_p_value(obs_stat = 0.5, direction = "righ")
  )
})

test_that("get_p_value works", {
  expect_equal(
    get_p_value(test_df, 4, "right")[[1]][1],
    5 / 20,
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, 4, "left")[[1]][1],
    17 / 20,
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, 4, "both")[[1]][1],
    10 / 20,
    tolerance = eps
  )

  expect_equal(
    get_p_value(test_df, 0, "right")[[1]][1],
    14 / 20,
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, 0, "left")[[1]][1],
    12 / 20,
    tolerance = eps
  )
  # This is also a check for not returning value more than 1
  expect_equal(get_p_value(test_df, 0, "both")[[1]][1], 1, tolerance = eps)

  expect_equal(
    get_p_value(test_df, -3.999, "right")[[1]][1],
    16 / 20,
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, -3.999, "left")[[1]][1],
    4 / 20,
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, -3.999, "both")[[1]][1],
    8 / 20,
    tolerance = eps
  )

  expect_equal(
    get_p_value(test_df, 4, "greater"),
    get_p_value(test_df, 4, "right"),
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, 4, "less"),
    get_p_value(test_df, 4, "left"),
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, 4, "two_sided"),
    get_p_value(test_df, 4, "both"),
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, 4, "two-sided"),
    get_p_value(test_df, 4, "both"),
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, 4, "two sided"),
    get_p_value(test_df, 4, "both"),
    tolerance = eps
  )
  expect_equal(
    get_p_value(test_df, 4, "two.sided"),
    get_p_value(test_df, 4, "both"),
    tolerance = eps
  )
})

test_that("theoretical p-value not supported error", {
  obs_F <- gss_tbl |>
    specify(hours ~ partyid) |>
    calculate(stat = "F")
  expect_snapshot(
    error = TRUE,
    gss_tbl |>
      specify(hours ~ partyid) |>
      hypothesize(null = "independence") |>
      calculate(stat = "F") |>
      get_p_value(obs_stat = obs_F, direction = "right")
  )
})

test_that("get_p_value warns in case of zero p-value", {
  expect_snapshot(
    res_ <- get_p_value(gss_calc, obs_stat = -10, direction = "left")
  )
})

test_that("get_p_value throws error in case of `NaN` stat", {
  gss_calc$stat[1] <- NaN
  expect_snapshot(error = TRUE, res_ <- get_p_value(gss_calc, 0, "both"))

  gss_calc$stat[2] <- NaN
  expect_snapshot(error = TRUE, res_ <- get_p_value(gss_calc, 0, "both"))

  # In the case that _all_ values are NaN, error should have different text
  gss_calc$stat <- NaN
  expect_snapshot(error = TRUE, res_ <- get_p_value(gss_calc, 0, "both"))
})

test_that("get_p_value can handle fitted objects", {
  set.seed(1)

  null_fits <- gss[1:50, ] |>
    specify(hours ~ age + college) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute") |>
    fit()

  obs_fit <- gss[1:50, ] |>
    specify(hours ~ age + college) |>
    fit()

  expect_equal(
    get_p_value(null_fits, obs_fit, "both"),
    structure(
      list(
        term = c("age", "collegedegree", "intercept"),
        p_value = c(0.6, 0.4, 0.6)
      ),
      row.names = c(NA, -3L),
      class = c("tbl_df", "tbl", "data.frame")
    ),
    ignore_attr = TRUE
  )

  # errors out when it ought to
  obs_fit_2 <- gss[1:50, ] |>
    specify(hours ~ age) |>
    fit()

  expect_snapshot(error = TRUE, get_p_value(null_fits, obs_fit_2, "both"))

  obs_fit_3 <- gss[1:50, ] |>
    specify(year ~ age + college) |>
    fit()

  expect_snapshot(error = TRUE, get_p_value(null_fits, obs_fit_3, "both"))

  set.seed(1)

  null_fits_4 <- gss[1:50, ] |>
    specify(hours ~ age) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute") |>
    fit()

  obs_fit_4 <- gss[1:50, ] |>
    specify(hours ~ age) |>
    fit()

  obs_fit_4

  expect_equal(
    get_p_value(null_fits_4, obs_fit_4, "both"),
    structure(
      list(
        term = c("age", "intercept"),
        p_value = c(0.6, 0.6)
      ),
      row.names = c(NA, -2L),
      class = c("tbl_df", "tbl", "data.frame")
    ),
    ignore_attr = TRUE
  )

  expect_equal(ncol(null_fits_4), ncol(obs_fit_4) + 1)
  expect_equal(nrow(null_fits_4), nrow(obs_fit_4) * 10)

  expect_equal(ncol(obs_fit_4), ncol(obs_fit))
  expect_equal(nrow(obs_fit_4), nrow(obs_fit) - 1)

  expect_true(is_fitted(obs_fit))
  expect_true(is_fitted(obs_fit_2))
  expect_true(is_fitted(obs_fit_3))
  expect_true(is_fitted(obs_fit_4))

  expect_true(is_fitted(null_fits))
  expect_true(is_fitted(null_fits_4))
})

test_that("get_p_value can handle bad args with fitted objects", {
  set.seed(1)

  null_fits <- gss[1:50, ] |>
    specify(hours ~ age + college) |>
    hypothesize(null = "independence") |>
    generate(reps = 10, type = "permute") |>
    fit()

  obs_fit <- gss[1:50, ] |>
    specify(hours ~ age + college) |>
    fit()

  expect_snapshot(error = TRUE, get_p_value(null_fits, "boop", "both"))

  expect_snapshot(
    error = TRUE,
    get_p_value(null_fits, obs_fit$estimate, "both")
  )

  expect_snapshot(error = TRUE, get_p_value(obs_fit, null_fits, "both"))
})

test_that("get_p_value errors informatively when args are switched", {
  # switch obs_stat and x
  obs_stat <- gss |>
    specify(response = hours) |>
    calculate(stat = "mean")

  set.seed(1)

  null_dist <- gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 41) |>
    generate(reps = 20, type = "bootstrap") |>
    calculate(stat = "mean")

  expect_snapshot(error = TRUE, get_p_value(obs_stat, null_dist, "both"))

  expect_silent(
    get_p_value(null_dist, obs_stat, "both")
  )
})

test_that("get_p_value can handle theoretical distributions", {
  get_p_value_ <- function(x, obs_stat, direction) {
    x <- get_p_value(x, obs_stat, direction)

    x$p_value
  }

  # f ------------------------------------------------------------
  # direction = "right" is the only valid one
  f_dist <-
    gss |>
    specify(age ~ partyid) |>
    hypothesize(null = "independence") |>
    assume(distribution = "F")

  f_obs <-
    gss |>
    specify(age ~ partyid) |>
    calculate(stat = "F")

  expect_equal(
    get_p_value_(f_dist, f_obs, direction = "right"),
    0.06005251,
    tolerance = 1e-3
  )

  old_way_f <- broom::tidy(aov(age ~ partyid, gss))

  expect_equal(
    get_p_value_(f_dist, f_obs, direction = "right"),
    old_way_f$p.value[[1]],
    tolerance = 1e-3
  )

  # t ------------------------------------------------------------
  t_dist <-
    gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    assume("t")

  t_obs <-
    gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    calculate(stat = "t")

  expect_equal(
    get_p_value_(t_dist, t_obs, direction = "both"),
    0.03755,
    tolerance = 1e-3
  )

  expect_equal(
    get_p_value_(t_dist, t_obs, direction = "left"),
    0.981,
    tolerance = 1e-3
  )

  expect_equal(
    get_p_value_(t_dist, t_obs, direction = "right"),
    1 - get_p_value_(t_dist, t_obs, direction = "left"),
    tolerance = 1e-3
  )

  expect_equal(
    get_p_value_(t_dist, t_obs, direction = "both"),
    (1 - get_p_value_(t_dist, t_obs, direction = "left")) * 2,
    tolerance = 1e-3
  )

  old_way_both <- t_test(gss, hours ~ NULL, mu = 40, alternative = "two.sided")

  expect_equal(
    old_way_both$p_value,
    get_p_value_(t_dist, t_obs, direction = "both"),
    tolerance = 1e-3
  )

  old_way_left <- t_test(gss, hours ~ NULL, mu = 40, alternative = "less")

  expect_equal(
    old_way_left$p_value,
    get_p_value_(t_dist, t_obs, direction = "left")
  )

  old_way_right <- t_test(gss, hours ~ NULL, mu = 40, alternative = "greater")

  expect_equal(
    old_way_right$p_value,
    get_p_value_(t_dist, t_obs, direction = "right")
  )

  # chisq ------------------------------------------------------------
  # direction = "right" is the only valid one
  chisq_dist <-
    gss |>
    specify(college ~ finrela) |>
    hypothesize(null = "independence") |>
    assume(distribution = "Chisq")

  chisq_obs <-
    gss |>
    specify(college ~ finrela) |>
    calculate(stat = "Chisq")

  expect_equal(
    get_p_value_(chisq_dist, chisq_obs, direction = "right"),
    1.082094e-05,
    tolerance = 1e-3
  )

  expect_snapshot(
    old_way <- chisq_test(gss, college ~ finrela)
  )

  expect_equal(
    old_way$p_value,
    get_p_value_(chisq_dist, chisq_obs, direction = "right"),
    tolerance = 1e-3
  )

  # z ------------------------------------------------------------
  z_dist <-
    gss |>
    specify(response = sex, success = "female") |>
    hypothesize(null = "point", p = .5) |>
    assume("z")

  z_obs <-
    gss |>
    specify(response = sex, success = "female") |>
    hypothesize(null = "point", p = .5) |>
    calculate(stat = "z")

  expect_equal(
    get_p_value_(z_dist, z_obs, direction = "both"),
    0.24492,
    tolerance = 1e-3
  )

  expect_equal(
    get_p_value_(z_dist, z_obs, direction = "left"),
    0.12246,
    tolerance = 1e-3
  )

  expect_equal(
    get_p_value_(z_dist, z_obs, direction = "right"),
    1 - get_p_value_(z_dist, z_obs, direction = "left"),
    tolerance = 1e-3
  )

  expect_equal(
    get_p_value_(z_dist, z_obs, direction = "both"),
    (1 - get_p_value_(z_dist, z_obs, direction = "right")) * 2,
    tolerance = 1e-3
  )

  old_way_z_both <- prop_test(
    gss,
    sex ~ NULL,
    success = "female",
    p = .5,
    alternative = "two.sided",
    z = TRUE
  )
  old_way_z_left <- prop_test(
    gss,
    sex ~ NULL,
    success = "female",
    p = .5,
    alternative = "less",
    z = TRUE
  )
  old_way_z_right <- prop_test(
    gss,
    sex ~ NULL,
    success = "female",
    p = .5,
    alternative = "greater",
    z = TRUE
  )

  expect_equal(
    get_p_value_(z_dist, z_obs, direction = "both"),
    old_way_z_both$p_value,
    tolerance = 1e-3
  )

  expect_equal(
    get_p_value_(z_dist, z_obs, direction = "left"),
    old_way_z_left$p_value,
    tolerance = 1e-3
  )

  expect_equal(
    get_p_value_(z_dist, z_obs, direction = "right"),
    old_way_z_right$p_value,
    tolerance = 1e-3
  )
})


test_that("get_p_value warns with bad theoretical distributions", {
  t_dist_40 <-
    gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    assume("t")

  t_dist_30 <-
    gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 30) |>
    assume("t")

  t_obs <-
    gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    calculate(stat = "t")

  expect_silent(
    get_p_value(
      t_dist_40,
      t_obs,
      direction = "both"
    )
  )

  expect_snapshot(
    res_ <- get_p_value(
      t_dist_30,
      t_obs,
      direction = "both"
    )
  )
})


================================================
FILE: tests/testthat/test-hypothesize.R
================================================
one_mean <- mtcars_df |>
  specify(response = mpg) |> # formula alt: mpg ~ NULL
  hypothesize(null = "point", mu = 25)

one_mean_specify <- mtcars_df |>
  specify(response = mpg)

one_median <- mtcars_df |>
  specify(response = mpg) |> # formula alt: mpg ~ NULL
  hypothesize(null = "point", med = 26)

one_prop <- mtcars_df |>
  specify(response = am, success = "1") |> # formula alt: am ~ NULL
  hypothesize(null = "point", p = .25)

one_prop_specify <- mtcars_df |>
  specify(response = am, success = "1")

two_props <- mtcars_df |>
  specify(am ~ vs, success = "1") |> # alt: response = am, explanatory = vs
  hypothesize(null = "independence")

gof_chisq <- mtcars_df |>
  specify(cyl ~ NULL) |> # alt: response = cyl
  hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25))

indep_chisq <- mtcars_df |>
  specify(cyl ~ am) |> # alt: response = cyl, explanatory = am
  hypothesize(null = "independence")

two_means <- mtcars_df |>
  specify(mpg ~ am) |> # alt: response = mpg, explanatory = am
  hypothesize(null = "independence")

two_medians <- mtcars_df |>
  specify(mpg ~ am) |> # alt: response = mpg, explanatory = am
  hypothesize(null = "independence")

anova_f <- mtcars_df |>
  specify(mpg ~ cyl) |> # alt: response = mpg, explanatory = cyl
  hypothesize(null = "independence")

slopes <- mtcars_df |>
  specify(mpg ~ hp) |> # alt: response = mpg, explanatory = cyl
  hypothesize(null = "independence")

test_that("auto `type` works (hypothesize)", {
  expect_equal(attr(one_mean, "type"), "bootstrap")
  expect_equal(attr(one_median, "type"), "bootstrap")
  expect_equal(attr(one_prop, "type"), "draw")
  expect_equal(attr(two_props, "type"), "permute")
  expect_equal(attr(gof_chisq, "type"), "draw")
  expect_equal(attr(indep_chisq, "type"), "permute")
  expect_equal(attr(two_means, "type"), "permute")
  expect_equal(attr(two_medians, "type"), "permute")
  expect_equal(attr(anova_f, "type"), "permute")
  expect_equal(attr(slopes, "type"), "permute")
})

test_that("hypothesize() throws an error when null is not point or independence", {
  expect_snapshot(
    error = TRUE,
    mtcars_df |>
      specify(response = mpg) |>
      hypothesize(null = "dependence")
  )
})

test_that("hypothesize() allows partial matching of null arg for point", {
  hyp_p <- mtcars_df |>
    specify(response = mpg) |>
    hypothesize(null = "po", mu = 0)
  expect_equal(attr(hyp_p, "null"), "point")
})

test_that("hypothesize() allows partial matching of null arg for independence", {
  hyp_i <- mtcars_df |>
    specify(mpg ~ vs) |>
    hypothesize(null = "i")
  expect_equal(attr(hyp_i, "null"), "independence")
})

test_that("hypothesize() throws an error when multiple null values are provided", {
  expect_snapshot(
    error = TRUE,
    mtcars_df |>
      specify(response = mpg) |>
      hypothesize(null = c("point", "independence"))
  )
})

test_that("hypothesize() throws an error when multiple params are set", {
  expect_snapshot(
    error = TRUE,
    mtcars_df |>
      specify(response = mpg) |>
      hypothesize(null = "point", mu = 25, med = 20)
  )
})

test_that("hypothesize() throws a warning when params are set with independence", {
  expect_snapshot(
    res_ <- mtcars_df |>
      specify(mpg ~ vs) |>
      hypothesize(null = "independence", mu = 25)
  )
})

test_that("hypothesize() throws a warning when params are set with paired independence", {
  expect_snapshot(
    res_ <- mtcars_df |>
      specify(response = mpg) |>
      hypothesize(null = "paired independence", mu = 25)
  )
})

test_that("hypothesize() throws an error when p is greater than 1", {
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(response = vs, success = "1") |>
      hypothesize(null = "point", p = 1 + .Machine$double.eps)
  )
})

test_that("hypothesize() throws an error when p is less than 0", {
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(response = vs, success = "1") |>
      hypothesize(null = "point", p = -.Machine$double.neg.eps)
  )
})

test_that("hypothesize() throws an error when p contains missing values", {
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(response = vs, success = "1") |>
      hypothesize(null = "point", p = c("0" = 0.5, "1" = NA_real_))
  )
})

test_that("hypothesize() throws an error when vector p does not sum to 1", {
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(response = vs, success = "1") |>
      hypothesize(null = "point", p = c("0" = 0.5, "1" = 0.5 + (eps * 2)))
  )
})

test_that("hypothesize arguments function", {
  mtcars_f <- dplyr::mutate(mtcars, cyl = factor(cyl))
  mtcars_s <- mtcars_f |> specify(response = mpg)
  matrix1 <- matrix(data = NA, nrow = 3, ncol = 3)

  expect_snapshot(error = TRUE, res_ <- hypothesize(matrix1))
  expect_snapshot(error = TRUE, res_ <- hypothesize(mtcars_s, null = NA))
  expect_snapshot(error = TRUE, res_ <- hypothesize(mtcars_s))

  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_s |> hypothesize(null = "point", mean = 3)
  )

  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_s |> hypothesize(null = "independence")
  )
  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_s |> hypothesize(null = "point")
  )

  expect_snapshot(
    error = TRUE,
    res_ <-
      mtcars_f |>
      specify(mpg ~ am) |>
      hypothesize(null = "paired independence")
  )

  # Produces error on win-build
  expect_snapshot(
    error = TRUE,
    res <- mtcars_s |> hypothesize(null = c("point", "independence"), mu = 3)
  )

  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      dplyr::select(vs) |>
      hypothesize(null = "point", mu = 1)
  )

  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_df |>
      specify(response = vs) |>
      hypothesize(null = "point", mu = 1)
  )

  expect_snapshot(
    error = TRUE,
    res_ <- mtcars_s |> hypothesize(null = "point", p = 0.2)
  )

  expect_snapshot(error = TRUE, res_ <- mtcars_s |> hypothesize())
})

test_that("params correct", {
  expect_snapshot(
    error = TRUE,
    res_ <- hypothesize(one_prop_specify, null = "point", mu = 2)
  )
  expect_snapshot(
    error = TRUE,
    res_ <- hypothesize(one_mean_specify, null = "point", mean = 0.5)
  )
})

test_that("sensible output", {
  expect_equal(class(one_mean)[1], "infer")
})

test_that("user can specify multiple explanatory variables", {
  x <-
    gss |>
    specify(hours ~ sex + college) |>
    hypothesize(null = "independence")

  expect_true(inherits(x, "infer"))
  expect_true(inherits(explanatory_variable(x), "tbl_df"))
  expect_true(inherits(explanatory_name(x), "character"))
  expect_true(inherits(explanatory_expr(x), "call"))

  expect_equal(explanatory_name(x), c("sex", "college"))
  expect_equal(response_name(x), "hours")

  expect_snapshot(
    res_ <- gss |>
      specify(hours ~ sex + college) |>
      hypothesize(null = "independence", mu = 40)
  )
})

# is_hypothesized ---------------------------------------------------------
test_that("is_hypothesized works", {
  expect_true(is_hypothesized(one_mean))
  expect_false(is_hypothesized(one_mean_specify))
})


================================================
FILE: tests/testthat/test-observe.R
================================================
test_that("observe() output is equal to core verbs", {
  expect_equal(
    gss |>
      observe(hours ~ NULL, stat = "mean"),
    gss |>
      specify(hours ~ NULL) |>
      calculate(stat = "mean")
  )

  expect_equal(
    gss |>
      observe(hours ~ NULL, stat = "t", null = "point", mu = 40),
    gss |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 40) |>
      calculate(stat = "t")
  )

  expect_equal(
    observe(
      gss,
      age ~ college,
      stat = "diff in means",
      order = c("degree", "no degree")
    ),
    gss |>
      specify(age ~ college) |>
      calculate("diff in means", order = c("degree", "no degree")),
    ignore_attr = TRUE
  )
})

test_that("observe messages/warns/errors informatively", {
  expect_equal(
    expect_message(
      gss |>
        observe(hours ~ NULL, stat = "mean", mu = 40)
    ) |>
      conditionMessage(),
    expect_message(
      gss |>
        specify(hours ~ NULL) |>
        hypothesize(null = "point", mu = 40) |>
        calculate(stat = "mean")
    ) |>
      conditionMessage()
  )

  expect_equal(
    expect_warning(
      gss |>
        observe(hours ~ NULL, stat = "t")
    ) |>
      conditionMessage(),
    expect_warning(
      gss |>
        specify(hours ~ NULL) |>
        calculate(stat = "t")
    ) |>
      conditionMessage()
  )

  expect_error(
    expect_equal(
      capture.output(
        gss |>
          observe(hours ~ age, stat = "diff in means"),
        type = "message"
      ),
      capture.output(
        gss |>
          specify(hours ~ age) |>
          calculate(stat = "diff in means"),
        type = "message"
      ),
    )
  )

  expect_error(
    expect_equal(
      gss |>
        observe(explanatory = age, stat = "diff in means"),
      gss |>
        specify(explanatory = age) |>
        calculate(stat = "diff in means")
    )
  )
})

test_that("observe() works with either specify() interface", {
  # unnamed formula argument
  expect_equal(
    gss |>
      observe(hours ~ NULL, stat = "mean"),
    gss |>
      observe(response = hours, stat = "mean"),
    ignore_attr = TRUE
  )

  expect_equal(
    gss |>
      observe(
        hours ~ college,
        stat = "diff in means",
        order = c("degree", "no degree")
      ),
    gss |>
      specify(hours ~ college) |>
      calculate(stat = "diff in means", order = c("degree", "no degree"))
  )

  # named formula argument
  expect_equal(
    gss |>
      observe(formula = hours ~ NULL, stat = "mean"),
    gss |>
      observe(response = hours, stat = "mean"),
    ignore_attr = TRUE
  )

  expect_equal(
    gss |>
      observe(formula = hours ~ NULL, stat = "mean"),
    gss |>
      observe(response = hours, stat = "mean"),
    ignore_attr = TRUE
  )

  expect_equal(
    gss |>
      observe(
        formula = hours ~ college,
        stat = "diff in means",
        order = c("degree", "no degree")
      ),
    gss |>
      specify(formula = hours ~ college) |>
      calculate(stat = "diff in means", order = c("degree", "no degree"))
  )
})

test_that("observe() output is the same as the old wrappers", {
  expect_snapshot(
    res_wrap <- gss_tbl |>
      chisq_stat(college ~ partyid)
  )

  expect_equal(
    gss_tbl |>
      observe(college ~ partyid, stat = "Chisq") |>
      dplyr::pull(),
    res_wrap
  )

  expect_snapshot(
    res_wrap_2 <- gss_tbl |>
      t_stat(hours ~ sex, order = c("male", "female"))
  )

  expect_equal(
    gss_tbl |>
      observe(stat = "t", hours ~ sex, order = c("male", "female")) |>
      dplyr::pull(),
    res_wrap_2
  )
})

test_that("observe() can handle arbitrary test statistics", {
  mean_manual <-
    gss |>
    specify(response = hours) |>
    calculate(stat = "mean")

  mean_observe <-
    observe(gss, response = hours, stat = function(x, ...) {mean(x$hours)})

  # use `ignore_attr` since infer will only calculate standard errors with
  # some pre-implemented statistics
  expect_equal(mean_manual, mean_observe, ignore_attr = TRUE)
})


================================================
FILE: tests/testthat/test-print.R
================================================
test_that("print works", {
  expect_output(print(
    gss_tbl |>
      specify(age ~ hours) |>
      hypothesize(null = "independence") |>
      generate(reps = 10, type = "permute")
  ))
})

test_that("print method fits linewidth with many predictors (#543)", {
  expect_snapshot(specify(mtcars, mpg ~ cyl + disp + hp + drat + wt + qsec))
})


================================================
FILE: tests/testthat/test-rep_sample_n.R
================================================
n_population <- 5
population <- tibble::tibble(
  ball_id = 1:n_population,
  color = factor(c(rep("red", 3), rep("white", n_population - 3)))
)


# rep_sample_n ------------------------------------------------------------
test_that("`rep_sample_n` works", {
  out <- rep_sample_n(population, size = 2, reps = 5)
  expect_equal(nrow(out), 2 * 5)
  expect_equal(colnames(out), c("replicate", colnames(population)))
  expect_true(dplyr::is_grouped_df(out))
})

test_that("`rep_sample_n` checks input", {
  # `tbl`
  expect_snapshot(error = TRUE, rep_sample_n("a", size = 1))

  # `size`
  expect_snapshot(error = TRUE, rep_sample_n(population, size = "a"))
  expect_snapshot(error = TRUE, rep_sample_n(population, size = 1:2))
  expect_snapshot(error = TRUE, rep_sample_n(population, size = -1))

  # `replace`
  expect_snapshot(
    error = TRUE,
    rep_sample_n(population, size = 1, replace = "a")
  )

  # `reps`
  expect_snapshot(error = TRUE, rep_sample_n(population, size = 1, reps = "a"))
  expect_snapshot(error = TRUE, rep_sample_n(population, size = 1, reps = 1:2))
  expect_snapshot(error = TRUE, rep_sample_n(population, size = 1, reps = 0.5))

  # `prob`
  expect_snapshot(error = TRUE, rep_sample_n(population, size = 1, prob = "a"))
  expect_snapshot(
    error = TRUE,
    rep_sample_n(population, size = 1, prob = c(0.1, 0.9))
  )
})

test_that("`rep_sample_n` gives error on big sample size if `replace=FALSE`", {
  expect_snapshot(
    error = TRUE,
    rep_sample_n(population, size = n_population * 2)
  )
})

test_that("`rep_sample_n` uses `size`", {
  set.seed(1)
  out <- rep_sample_n(population, size = 2)
  expect_equal(nrow(out), 2)

  # `size = 0` is allowed following `dplyr::sample_n()`
  out <- rep_sample_n(population, size = 0)
  expect_true(nrow(out) == 0)
})

test_that("`rep_sample_n` uses `replace`", {
  set.seed(1)
  res_repl <- rep_sample_n(population, size = 5, reps = 100, replace = TRUE)

  set.seed(1)
  res_norepl <- rep_sample_n(population, size = 5, reps = 100, replace = FALSE)

  expect_true(all(res_repl[["replicate"]] == res_norepl[["replicate"]]))
  expect_false(all(res_repl[["ball_id"]] == res_norepl[["ball_id"]]))
  expect_false(all(res_repl[["color"]] == res_norepl[["color"]]))

  # Check if there are actually no duplicates in case `replace = FALSE`
  no_duplicates <- all(
    tapply(res_norepl$ball_id, res_norepl$replicate, anyDuplicated) == 0
  )
  expect_true(no_duplicates)
})

test_that("`rep_sample_n` uses `reps`", {
  set.seed(1)
  out <- rep_sample_n(population, size = 2, reps = 5)
  expect_equal(nrow(out), 2 * 5)

  # `size = 0` is allowed even with `reps > 1`
  out <- rep_sample_n(population, size = 0, reps = 10)
  expect_true(nrow(out) == 0)
})

test_that("`rep_sample_n` uses `prob`", {
  set.seed(1)
  res1 <- rep_sample_n(
    population,
    size = 5,
    reps = 100,
    replace = TRUE,
    prob = c(1, rep(0, n_population - 1))
  )

  expect_true(all(res1$ball_id == 1))
  expect_true(all(res1$color == "red"))

  # `prob` should be automatically normalized
  set.seed(1)
  res1 <- rep_sample_n(
    population,
    size = n_population,
    prob = rep(1, n_population)
  )
  set.seed(1)
  res2 <- rep_sample_n(
    population,
    size = n_population,
    prob = rep(1, n_population) / n_population
  )

  expect_equal(res1[["ball_id"]], res2[["ball_id"]])
})


# rep_slice_sample --------------------------------------------------------
test_that("`rep_slice_sample` works", {
  # By default only one row should be sampled
  out <- rep_slice_sample(population)
  expect_equal(nrow(out), 1)
  expect_equal(colnames(out), c("replicate", colnames(population)))
  expect_true(dplyr::is_grouped_df(out))

  # Using `n` argument
  out <- rep_slice_sample(population, n = 2, reps = 5)
  expect_equal(nrow(out), 2 * 5)

  # Using `prop` argument
  prop <- 2 / n_population
  out <- rep_slice_sample(population, prop = prop, reps = 5)
  expect_equal(nrow(out), 2 * 5)
})

test_that("`rep_slice_sample` checks input", {
  # `.data`
  expect_snapshot(error = TRUE, rep_slice_sample("a", n = 1))

  # `n`
  expect_snapshot(error = TRUE, rep_slice_sample(population, n = "a"))
  expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1:2))
  expect_snapshot(error = TRUE, rep_slice_sample(population, n = -1))

  # `prop`
  expect_snapshot(error = TRUE, rep_slice_sample(population, prop = "a"))
  expect_snapshot(error = TRUE, rep_slice_sample(population, prop = 1:2))
  expect_snapshot(error = TRUE, rep_slice_sample(population, prop = -1))

  # Only one `n` or `prop` should be supplied
  expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1, prop = 0.5))

  # `replace`
  expect_snapshot(
    error = TRUE,
    rep_slice_sample(population, n = 1, replace = "a")
  )

  # `weight_by`
  expect_snapshot(
    error = TRUE,
    rep_slice_sample(population, n = 1, weight_by = "a")
  )
  expect_snapshot(
    error = TRUE,
    rep_slice_sample(population, n = 1, weight_by = c(0.1, 0.9))
  )
  expect_snapshot(
    error = TRUE,
    rep_slice_sample(population, n = 1, weight_by = wts)
  )

  # `reps`
  expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1, reps = "a"))
  expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1, reps = 1:2))
  expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1, reps = 0.5))
})

test_that("`rep_slice_sample` warns on big sample size if `replace = FALSE`", {
  # Using big `n`
  expect_snapshot(
    out <- rep_slice_sample(population, n = n_population * 2, reps = 1)
  )
  expect_true(nrow(out) == n_population)

  # Using big `prop`
  expect_snapshot(
    out <- rep_slice_sample(population, prop = 2, reps = 1)
  )
  expect_true(nrow(out) == n_population)
})

test_that("`rep_slice_sample` uses `n` and `prop`", {
  set.seed(1)
  res1 <- rep_slice_sample(population, n = 1)

  set.seed(1)
  res2 <- rep_slice_sample(population, prop = 1 / n_population)

  expect_equal(res1, res2)

  # Output sample size is rounded down when using `prop`
  set.seed(1)
  res3 <- rep_slice_sample(population, prop = 1.5 / n_population)

  expect_equal(res2, res3)

  # `n = 0` is allowed
  out <- rep_slice_sample(population, n = 0)
  expect_equal(nrow(out), 0)

  # `prop = 0` is allowed
  out <- rep_slice_sample(population, prop = 0)
  expect_equal(nrow(out), 0)
})

test_that("`rep_slice_sample` uses `replace`", {
  set.seed(1)
  res_repl <- rep_slice_sample(population, n = 5, reps = 100, replace = TRUE)

  set.seed(1)
  res_norepl <- rep_slice_sample(population, n = 5, reps = 100, replace = FALSE)

  expect_true(all(res_repl[["replicate"]] == res_norepl[["replicate"]]))
  expect_false(all(res_repl[["ball_id"]] == res_norepl[["ball_id"]]))
  expect_false(all(res_repl[["color"]] == res_norepl[["color"]]))

  # Check if there are actually no duplicates in case `replace = FALSE`
  no_duplicates <- all(
    tapply(res_norepl$ball_id, res_norepl$replicate, anyDuplicated) == 0
  )
  expect_true(no_duplicates)
})

test_that("`rep_slice_sample` uses `weight_by`", {
  set.seed(1)
  res1 <- rep_slice_sample(
    population,
    n = 5,
    reps = 100,
    replace = TRUE,
    weight_by = c(1, rep(0, n_population - 1))
  )

  expect_true(all(res1$ball_id == 1))
  expect_true(all(res1$color == "red"))

  # `weight_by` should be automatically normalized
  set.seed(1)
  res1 <- rep_slice_sample(
    population,
    n = n_population,
    weight_by = rep(1, n_population)
  )
  set.seed(1)
  res2 <- rep_slice_sample(
    population,
    n = n_population,
    weight_by = rep(1, n_population) / n_population
  )

  population_wt <-
    population |>
    dplyr::mutate(wts = rep(1, n_population) / n_population)
  set.seed(1)
  res3 <- rep_slice_sample(
    population_wt,
    n = n_population,
    weight_by = wts
  )

  expect_equal(res1[["ball_id"]], res2[["ball_id"]])
  expect_equal(res1[["ball_id"]], res3[["ball_id"]])
})

test_that("`rep_slice_sample` uses `reps`", {
  set.seed(1)
  out <- rep_slice_sample(population, n = 2, reps = 5)
  expect_equal(nrow(out), 2 * 5)

  # `n = 0` is allowed even with `reps > 1`
  out <- rep_slice_sample(population, n = 0, reps = 10)
  expect_true(nrow(out) == 0)

  # `prop = 0` is allowed even with `reps > 1`
  out <- rep_slice_sample(population, prop = 0, reps = 10)
  expect_true(nrow(out) == 0)
})


================================================
FILE: tests/testthat/test-shade_confidence_interval.R
================================================
# shade_confidence_interval -----------------------------------------------
test_that("shade_confidence_interval works", {
  skip_if(getRversion() < "4.1.0")

  # Adding `shade_confidence_interval()` to simulation plot
  expect_doppelganger(
    "ci-sim-fill",
    gss_viz_sim + shade_confidence_interval(c(-1, 1))
  )
  expect_doppelganger(
    "ci-sim-nofill",
    gss_viz_sim + shade_confidence_interval(c(-1, 1), fill = NULL)
  )

  # Adding `shade_confidence_interval()` to theoretical plot
  expect_doppelganger(
    "ci-theor-fill",
    gss_viz_theor + shade_confidence_interval(c(-1, 1))
  )
  expect_doppelganger(
    "ci-theor-nofill",
    gss_viz_theor + shade_confidence_interval(c(-1, 1), fill = NULL)
  )

  # Adding `shade_confidence_interval()` to "both" plot
  expect_doppelganger(
    "ci-both-fill",
    gss_viz_both + shade_confidence_interval(c(-1, 1))
  )
  expect_doppelganger(
    "ci-both-nofill",
    gss_viz_both + shade_confidence_interval(c(-1, 1), fill = NULL)
  )
})

test_that("shade_confidence_interval accepts `NULL` as `endpoints`", {
  skip_if(getRversion() < "4.1.0")

  expect_doppelganger(
    "ci-null-endpoints",
    gss_viz_sim + shade_confidence_interval(NULL)
  )
})

test_that("shade_confidence_interval uses extra aesthetic", {
  skip_if(getRversion() < "4.1.0")

  expect_doppelganger(
    "ci-extra-aes-1",
    gss_viz_sim + shade_confidence_interval(c(-1, 1), alpha = 1)
  )
  expect_doppelganger(
    "ci-extra-aes-2",
    gss_viz_sim + shade_confidence_interval(c(-1, 1), linetype = "dotted")
  )
})

test_that("shade_confidence_interval throws errors and warnings", {
  skip_if(getRversion() < "4.1.0")

  expect_snapshot(res_ <- gss_viz_sim + shade_confidence_interval(c(1, 2, 3)))
  expect_snapshot(
    error = TRUE,
    res_ <- gss_viz_sim + shade_confidence_interval(data.frame(x = 1))
  )
  expect_snapshot(
    error = TRUE,
    res_ <- gss_viz_sim + shade_confidence_interval(c(-1, 1), color = "x")
  )
  expect_snapshot(
    error = TRUE,
    res_ <- gss_viz_sim + shade_confidence_interval(c(-1, 1), fill = "x")
  )
  expect_snapshot(
    error = TRUE,
    res_ <- gss_viz_sim |> shade_confidence_interval(c(-1, 1))
  )
  expect_snapshot(
    error = TRUE,
    res_ <- gss_viz_sim |> shade_confidence_interval(endpoints = c(-1, 1))
  )
  expect_snapshot(error = TRUE, res_ <- gss_viz_sim |> shade_ci(c(-1, 1)))
  expect_snapshot(
    error = TRUE,
    res_ <- gss_viz_sim |> shade_ci(endpoints = c(-1, 1))
  )
})

# shade_ci ----------------------------------------------------------------
# Tested in `shade_confidence_interval()`


================================================
FILE: tests/testthat/test-shade_p_value.R
================================================
# shade_p_value -----------------------------------------------------------
test_that("shade_p_value works", {
  skip_if(getRversion() < "4.1.0")

  # Adding `shade_p_value()` to simulation plot
  expect_doppelganger(
    "pval-sim-right",
    gss_viz_sim + shade_p_value(1, "right")
  )
  expect_doppelganger("pval-sim-left", gss_viz_sim + shade_p_value(1, "left"))
  expect_doppelganger("pval-sim-both", gss_viz_sim + shade_p_value(1, "both"))
  expect_doppelganger("pval-sim-null", gss_viz_sim + shade_p_value(1, NULL))
  expect_warning(
    p_val_sim_corrupt <- gss_viz_sim + shade_p_value(1, "aaa"),
    "direction"
  )
  expect_doppelganger(
    "pval-sim-corrupt",
    p_val_sim_corrupt
  )

  # Adding `shade_p_value()` to theoretical plot
  expect_doppelganger(
    "pval-theor-right",
    gss_viz_theor + shade_p_value(1, "right")
  )
  expect_doppelganger(
    "pval-theor-left",
    gss_viz_theor + shade_p_value(1, "left")
  )
  expect_doppelganger(
    "pval-theor-both",
    gss_viz_theor + shade_p_value(1, "both")
  )
  expect_doppelganger(
    "pval-theor-null",
    gss_viz_theor + shade_p_value(1, NULL)
  )
  expect_warning(
    pval_theor_corrupt <- gss_viz_theor + shade_p_value(1, "aaa"),
    "direction"
  )
  expect_doppelganger(
    "pval-theor-corrupt",
    pval_theor_corrupt
  )

  # Adding `shade_p_value()` to "both" plot
  expect_doppelganger(
    "pval-both-right",
    gss_viz_both + shade_p_value(1, "right")
  )
  expect_doppelganger(
    "pval-both-left",
    gss_viz_both + shade_p_value(1, "left")
  )
  expect_doppelganger(
    "pval-both-both",
    gss_viz_both + shade_p_value(1, "both")
  )
  expect_doppelganger(
    "pval-both-null",
    gss_viz_both + shade_p_value(1, NULL)
  )
  expect_warning(
    pval_both_corrupt <- gss_viz_both + shade_p_value(1, "aaa"),
    "direction"
  )
  expect_doppelganger(
    "pval-both-corrupt",
    pval_both_corrupt
  )

  # -roper p-value shading when the calculated statistic falls exactly on the
  # boundaries of a histogram bin (#424)
  r_hat <- gss |>
    observe(
      college ~ sex,
      success = "no degree",
      stat = "ratio of props",
      order = c("female", "male")
    )

  set.seed(33)

  null_dist <- gss |>
    specify(college ~ sex, success = "no degree") |>
    hypothesize(null = "independence") |>
    generate(reps = 1000) |>
    calculate(stat = "ratio of props", order = c("female", "male"))

  expect_doppelganger(
    "pval-stat-match",
    visualize(null_dist) +
      shade_p_value(obs_stat = r_hat, direction = "two-sided")
  )
})

test_that("shade_p_value accepts synonyms for 'direction'", {
  skip_if(getRversion() < "4.1.0")

  expect_doppelganger(
    "pval-direction-right",
    gss_viz_sim + shade_p_value(1, "greater")
  )
  expect_doppelganger(
    "pval-direction-left",
    gss_viz_sim + shade_p_value(1, "less")
  )
  # This currently results into the following {vdiffr} warning:
  # "Duplicated expectations: pval-direction-both, pval-direction-both"
  # However, having same figure here as expectation is exactly the goal of tests
  expect_doppelganger(
    "pval-direction-both",
    gss_viz_sim + shade_p_value(1, "two_sided")
  )
  expect_doppelganger(
    "pval-direction-both-2",
    gss_viz_sim + shade_p_value(1, "two-sided")
  )
  expect_doppelganger(
    "pval-direction-both-3",
    gss_viz_sim + shade_p_value(1, "two sided")
  )
  expect_doppelganger(
    "pval-direction-both-4",
    gss_viz_sim + shade_p_value(1, "two.sided")
  )
})

test_that("shade_p_value uses extra aesthetic", {
  skip_if(getRversion() < "4.1.0")

  expect_doppelganger(
    "pval-extra-aes-1",
    gss_viz_sim + shade_p_value(1, "two_sided", alpha = 1)
  )
  expect_doppelganger(
    "pval-extra-aes-2",
    gss_viz_sim + shade_p_value(1, "two_sided", linetype = "dotted")
  )
  expect_doppelganger(
    "pval-extra-aes-3",
    gss_viz_sim + shade_p_value(1, "two_sided", linewidth = 4)
  )
})

test_that("shade_p_value accepts `NULL` as `obs_stat`", {
  skip_if(getRversion() < "4.1.0")

  expect_doppelganger(
    "pval-null-obs_stat",
    gss_viz_sim + shade_p_value(NULL, "left")
  )
})

test_that("shade_p_value throws errors", {
  skip_if(getRversion() < "4.1.0")

  expect_snapshot(error = TRUE, gss_viz_sim + shade_p_value("a", "right"))
  expect_snapshot(error = TRUE, gss_viz_sim + shade_p_value(1, 1))
  expect_snapshot(
    error = TRUE,
    gss_viz_sim + shade_p_value(1, "right", color = "x")
  )
  expect_snapshot(
    error = TRUE,
    gss_viz_sim + shade_p_value(1, "right", fill = "x")
  )
  expect_snapshot(error = TRUE, gss_viz_sim |> shade_p_value(1, "right"))
  expect_snapshot(error = TRUE, gss_viz_sim |> shade_p_value(obs_stat = 1))
  expect_snapshot(
    error = TRUE,
    gss_viz_sim |> shade_p_value(obs_stat = 1, direction = "right")
  )
  expect_snapshot(error = TRUE, gss_viz_sim |> shade_pvalue(1, "right"))
  expect_snapshot(error = TRUE, gss_viz_sim |> shade_pvalue(obs_stat = 1))
  expect_snapshot(
    error = TRUE,
    gss_viz_sim |> shade_pvalue(obs_stat = 1, direction = "right")
  )
})

test_that("`shade_p_value()` handles 0-area shading without issue (#528)", {
  expect_no_condition(
    zero_area_shade <- visualize(gss_permute) + shade_p_value(100, "right")
  )

  expect_doppelganger(
    "zero_area_shade",
    expect_no_condition(print(zero_area_shade)),
  )
})

# norm_direction ----------------------------------------------------------
test_that("norm_direction works", {
  skip_if(getRversion() < "4.1.0")

  expect_equal(norm_direction("left"), "left")
  expect_equal(norm_direction("less"), "left")
  expect_equal(norm_direction("right"), "right")
  expect_equal(norm_direction("greater"), "right")
  expect_equal(norm_direction("both"), "both")
  expect_equal(norm_direction("two-sided"), "both")
  expect_equal(norm_direction("two_sided"), "both")
  expect_equal(norm_direction("two sided"), "both")
  expect_equal(norm_direction("two.sided"), "both")
})


================================================
FILE: tests/testthat/test-specify.R
================================================
one_nonshift_mean <- mtcars_df |> specify(response = mpg)

one_nonshift_prop <- mtcars_df |> specify(response = am, success = "1")

two_means_boot <- mtcars_df |> specify(mpg ~ am)

two_props_boot <- mtcars_df |> specify(am ~ vs, success = "1")

slope_boot <- mtcars_df |> specify(mpg ~ hp)

test_that("auto `type` works (specify)", {
  expect_equal(attr(one_nonshift_mean, "type"), "bootstrap")
  expect_equal(attr(one_nonshift_prop, "type"), "bootstrap")
  expect_equal(attr(two_means_boot, "type"), "bootstrap")
  expect_equal(attr(two_props_boot, "type"), "bootstrap")
  expect_equal(attr(slope_boot, "type"), "bootstrap")
})

test_that("data argument", {
  expect_snapshot(error = TRUE, specify(blah ~ cyl))
  expect_snapshot(error = TRUE, specify(1:3))
  expect_s3_class(mtcars_df, "data.frame")
  expect_snapshot(error = TRUE, specify(mtcars_df, mtcars_df$mpg))
})

test_that("response and explanatory arguments", {
  expect_snapshot(error = TRUE, specify(mtcars_df, response = blah))
  expect_snapshot(error = TRUE, specify(mtcars_df, response = "blah"))
  expect_snapshot(error = TRUE, specify(mtcars_df, formula = mpg ~ blah))
  expect_snapshot(error = TRUE, specify(mtcars_df, blah2 ~ cyl))
  expect_snapshot(error = TRUE, specify(mtcars_df))
  expect_snapshot(error = TRUE, specify(mtcars_df, formula = mpg ~ mpg))
  expect_snapshot(error = TRUE, specify(mtcars_df, formula = "mpg" ~ cyl))
  expect_snapshot(error = TRUE, specify(mtcars_df, formula = mpg ~ "cyl"))
  expect_silent(specify(mtcars_df, formula = mpg ~ cyl))

  expect_snapshot(error = TRUE, specify(mtcars_df, formula = NULL ~ cyl))
})

test_that("success argument", {
  expect_snapshot(error = TRUE, specify(mtcars_df, response = vs, success = 1))
  expect_snapshot(
    error = TRUE,
    specify(mtcars_df, response = vs, success = "bogus")
  )
  expect_snapshot(
    error = TRUE,
    specify(mtcars_df, response = mpg, success = "1")
  )
  expect_snapshot(
    error = TRUE,
    specify(mtcars_df, response = cyl, success = "4")
  )
  # success not given
  expect_snapshot(error = TRUE, specify(mtcars_df, response = am))
})

test_that("sensible output", {
  expect_equal(ncol(specify(mtcars_df, formula = mpg ~ NULL)), 1)
  expect_equal(ncol(specify(mtcars_df, formula = mpg ~ wt)), 2)
  expect_equal(class(specify(mtcars_df, formula = mpg ~ wt))[1], "infer")
})

test_that("formula argument is a formula", {
  expect_snapshot(error = TRUE, specify(mtcars_df, formula = "vs", success = 1))

  # Issue #110: https://github.com/tidymodels/infer/issues/110
  expect_snapshot(error = TRUE, specify(mtcars, am, success = "1"))
  expect_snapshot(error = TRUE, specify(mtcars, response = am, "1"))
  expect_silent({
    mtcars |>
      dplyr::mutate(am = factor(am)) |>
      specify(response = am, success = "1")
  })
})

test_that("is_complete works", {
  some_missing <- data.frame(vec = c(NA, 2, 3))
  expect_snapshot(res_ <- specify(some_missing, response = vec))
})

test_that("specify doesn't have NSE issues (#256)", {
  expect_silent(specify(tibble(x = 1:10), x ~ NULL))
})

test_that("specify messages when dropping unused levels", {
  withr::local_envvar(SUPPRESS_INFER_MESSAGES = "false")
  expect_snapshot(
    res_ <- gss |>
      dplyr::filter(partyid %in% c("rep", "dem")) |>
      specify(age ~ partyid)
  )

  expect_snapshot(
    res_ <- gss |>
      dplyr::filter(partyid %in% c("rep", "dem")) |>
      specify(partyid ~ age)
  )

  expect_snapshot(
    res_ <- gss |>
      dplyr::filter(partyid %in% c("rep", "dem")) |>
      specify(partyid ~ NULL)
  )

  expect_silent(
    gss |>
      dplyr::filter(partyid %in% c("rep", "dem")) |>
      specify(age ~ NULL)
  )
})

test_that("user can specify multiple explanatory variables", {
  x <- gss |> specify(hours ~ sex + college)

  expect_true(inherits(x, "infer"))
  expect_true(inherits(explanatory_variable(x), "tbl_df"))
  expect_true(inherits(explanatory_name(x), "character"))
  expect_true(inherits(explanatory_expr(x), "call"))

  expect_equal(explanatory_name(x), c("sex", "college"))
  expect_equal(response_name(x), "hours")
})


================================================
FILE: tests/testthat/test-utils.R
================================================
test_that("append_infer_class works", {
  expect_equal(
    class(append_infer_class(structure("a", class = "b"))),
    c("infer", "b")
  )
  expect_equal(
    class(append_infer_class(structure("a", class = c("infer", "b")))),
    c("infer", "b")
  )
})

null_val <- NULL

test_that("is_single_number works", {
  # Basic usage
  expect_true(is_single_number(1))
  expect_true(is_single_number(1L))
  expect_false(is_single_number("a"))
  expect_false(is_single_number(1:2))

  # Infinity and `NA` are not allowed
  expect_false(is_single_number(Inf))
  expect_false(is_single_number(-Inf))
  expect_false(is_single_number(NA_real_))

  # Using boundaries
  expect_true(is_single_number(1, min_val = -10))
  expect_false(is_single_number(1, min_val = 10))

  expect_true(is_single_number(1, max_val = 10))
  expect_false(is_single_number(1, max_val = -10))

  expect_true(is_single_number(1, min_val = -10, max_val = 10))
  expect_false(is_single_number(1, min_val = -10, max_val = 0))
  expect_false(is_single_number(1, min_val = 10, max_val = 100))

  # Using boundary inclusivity
  ## Inclusive by default
  expect_true(is_single_number(1, min_val = 1))
  expect_true(is_single_number(1, max_val = 1))

  expect_false(is_single_number(1, min_val = 1, include_min_val = FALSE))
  expect_false(is_single_number(1, max_val = 1, include_max_val = FALSE))
})

test_that("is_truefalse works", {
  expect_true(is_truefalse(TRUE))
  expect_true(is_truefalse(FALSE))
  expect_false(is_truefalse(c(TRUE, TRUE)))
  expect_false(is_truefalse("a"))
  expect_false(is_truefalse(1L))
})

test_that("check_type works", {
  x_var <- 1L

  expect_silent(check_type(x_var, is.integer))

  expect_snapshot(error = TRUE, check_type(x_var, is.character))
  expect_snapshot(error = TRUE, check_type(x_var, is.character, "symbolic"))

  x_df <- data.frame(x = TRUE)
  expect_silent(check_type(x_df, is.data.frame))
  expect_snapshot(error = TRUE, check_type(x_df, is.logical))
})

test_that("check_type allows `NULL`", {
  input <- NULL
  expect_silent(check_type(input, is.numeric, allow_null = TRUE))
})

test_that("check_type allows custom name for `x`", {
  input <- "a"
  expect_snapshot(error = TRUE, check_type(input, is.numeric, x_name = "aaa"))
})

test_that("check_type allows extra arguments for `predicate`", {
  is_geq <- function(x, min_val) {
    x >= min_val
  }
  expect_silent(check_type(1, is_geq, min_val = 0))
  expect_snapshot(error = TRUE, check_type(1, is_geq, min_val = 2))
})

test_that("check_type allows formula `predicate`", {
  expect_silent(check_type(1, ~ is.numeric(.) && (. > 0)))

  # By default type should be inferred as the whole formula
  expect_snapshot(error = TRUE, check_type("a", ~ is.numeric(.)))
})


test_that("get_type works", {
  expect_equal(get_type(data.frame(x = 1)), "data.frame")
  expect_equal(get_type(list(x = 1)), "list")
  expect_equal(get_type(TRUE), "logical")
})

test_that("c_dedupl returns input when unnamed", {
  expect_equal(c_dedupl(c(1, 2, 3)), c(1, 2, 3))
})

test_that("hypothesize errors out when x isn't a dataframe", {
  expect_snapshot(error = TRUE, hypothesize(c(1, 2, 3), null = "point"))
})

test_that("p_null supplies appropriate params", {
  expect_equal(
    gss |> specify(partyid ~ NULL) |> p_null(),
    c(p.dem = 0.2, p.ind = 0.2, p.rep = 0.2, p.other = 0.2, p.DK = 0.2)
  )
})

test_that("variables are standardized as expected", {
  gss_types <-
    gss |>
    dplyr::mutate(
      age = as.integer(age),
      is_dem = dplyr::if_else(partyid == "dem", TRUE, FALSE),
      finrela = as.character(finrela)
    )

  gss_std <- standardize_variable_types(gss_types)

  expect_true(inherits(gss_types$age, "integer"))
  expect_true(inherits(gss_types$finrela, "character"))
  expect_true(inherits(gss_types$income, "ordered"))
  expect_true(inherits(gss_types$college, "factor"))
  expect_true(inherits(gss_types$is_dem, "logical"))

  expect_null(levels(gss_types$is_dem))

  expect_true(inherits(gss_std$age, "numeric"))
  expect_true(inherits(gss_std$finrela, "factor"))
  expect_true(inherits(gss_std$income, "factor"))
  expect_true(inherits(gss_std$college, "factor"))
  expect_true(inherits(gss_std$is_dem, "factor"))

  expect_equal(levels(gss_std$is_dem), c("TRUE", "FALSE"))
})

test_that("group_by_replicate() helper returns correct results", {
  reps <- 500
  nrow_gss <- nrow(gss)

  gss_gen <-
    gss |>
    specify(age ~ college) |>
    hypothesize(null = "independence") |>
    generate(reps = reps, type = "permute") |>
    dplyr::ungroup()

  expect_equal(
    dplyr::group_by(gss_gen, replicate),
    group_by_replicate(gss_gen, reps, nrow_gss)
  )
})


================================================
FILE: tests/testthat/test-visualize.R
================================================
library(dplyr)

set.seed(42)

hours_resamp <- gss_tbl |>
  specify(hours ~ NULL) |>
  hypothesize(null = "point", med = 3) |>
  generate(reps = 10, type = "bootstrap") |>
  calculate(stat = "median")

obs_slope <- lm(age ~ hours, data = gss_tbl) |>
  broom::tidy() |>
  dplyr::filter(term == "hours") |>
  dplyr::select(estimate) |>
  dplyr::pull()

obs_diff <- gss_tbl |>
  group_by(college) |>
  summarize(prop = mean(college == "no degree")) |>
  summarize(diff(prop)) |>
  pull()

obs_z <- sqrt(
  stats::prop.test(
    x = table(gss_tbl$college, gss_tbl$sex),
    n = nrow(gss_tbl),
    alternative = "two.sided",
    correct = FALSE
  )$statistic
)

obs_diff_mean <- gss_tbl |>
  group_by(college) |>
  summarize(mean_sepal_width = mean(hours)) |>
  summarize(diff(mean_sepal_width)) |>
  pull()

obs_t <- gss_tbl |>
  observe(hours ~ college, order = c("no degree", "degree"), stat = "t")

obs_F <- anova(
  aov(formula = hours ~ partyid, data = gss_tbl)
)$`F value`[1]

test_that("visualize warns with bad arguments", {
  skip_if(getRversion() < "4.1.0")

  # warns when supplied deprecated args in what used to be
  # a valid way
  expect_snapshot(
    res_ <- gss_tbl |>
      specify(age ~ hours) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "slope") |>
      visualize(obs_stat = obs_slope, direction = "right")
  )

  # warning is the same when deprecated args are inappropriate
  expect_snapshot(
    res_ <- gss_tbl |>
      specify(age ~ hours) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "slope") |>
      visualize(obs_stat = obs_slope)
  )

  # same goes for CI args
  expect_snapshot(
    res_ <- gss_tbl |>
      specify(age ~ hours) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "slope") |>
      visualize(endpoints = c(.01, .02))
  )

  # output should not change when supplied a deprecated argument
  age_hours_df <- gss_tbl |>
    specify(age ~ hours) |>
    hypothesize(null = "independence") |>
    generate(reps = 100, type = "permute") |>
    calculate(stat = "slope")

  expect_snapshot(
    res <- age_hours_df |>
      visualize(endpoints = c(.01, .02))
  )

  expect_equal(
    age_hours_df |>
      visualize(),
    res
  )
})

test_that("visualize basic tests", {
  skip_if(getRversion() < "4.1.0")

  expect_doppelganger("visualize", visualize(hours_resamp))

  # visualise also works
  expect_doppelganger("visualise", visualise(hours_resamp))

  expect_snapshot(error = TRUE, hours_resamp |> visualize(bins = "yep"))
  expect_doppelganger(
    "vis-sim-right-1",
    gss_tbl |>
      specify(age ~ hours) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "slope") |>
      visualize() +
      shade_p_value(obs_stat = obs_slope, direction = "right")
  )

  # obs_stat not specified
  expect_snapshot_error(
    gss_tbl |>
      specify(sex ~ college, success = "female") |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "diff in props", order = c("no degree", "degree")) |>
      visualize() +
      shade_p_value(direction = "both")
  )

  expect_doppelganger(
    "vis-sim-both-1",
    gss_tbl |>
      specify(sex ~ college, success = "female") |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "diff in props", order = c("no degree", "degree")) |>
      visualize() +
      shade_p_value(direction = "both", obs_stat = obs_diff)
  )

  expect_snapshot(
    res_vis_theor_none_1 <- gss_tbl |>
      specify(sex ~ college, success = "female") |>
      hypothesize(null = "independence") |>
      calculate(stat = "z", order = c("no degree", "degree")) |>
      visualize(method = "theoretical")
  )

  expect_doppelganger("vis-theor-none-1", res_vis_theor_none_1)

  # diff in props and z on different scales
  expect_snapshot(
    error = TRUE,
    gss_tbl |>
      specify(sex ~ college, success = "female") |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "diff in props", order = c("no degree", "degree")) |>
      visualize(method = "both") +
      shade_p_value(direction = "both", obs_stat = obs_diff)
  )

  expect_doppelganger(
    "vis-sim-none-1",
    expect_silent(
      gss_tbl |>
        specify(sex ~ college, success = "female") |>
        hypothesize(null = "independence") |>
        generate(reps = 100, type = "permute") |>
        calculate(stat = "diff in props", order = c("no degree", "degree")) |>
        visualize()
    )
  )

  expect_warning(
    vis_both_both_1 <- gss_tbl |>
      specify(sex ~ college, success = "female") |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "z", order = c("no degree", "degree")) |>
      visualize(method = "both") +
      shade_p_value(direction = "both", obs_stat = obs_z)
  )
  expect_doppelganger(
    "vis-both-both-1",
    vis_both_both_1
  )

  expect_warning(
    vis_both_both_2 <- gss_tbl |>
      specify(sex ~ college, success = "female") |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "z", order = c("degree", "no degree")) |>
      visualize(method = "both") +
      shade_p_value(direction = "both", obs_stat = -obs_z)
  )
  expect_doppelganger(
    "vis-both-both-2",
    vis_both_both_2
  )

  expect_warning(
    vis_both_left_1 <- gss_tbl |>
      specify(age ~ sex) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "t", order = c("female", "male")) |>
      visualize(method = "both") +
      shade_p_value(direction = "left", obs_stat = obs_t)
  )
  expect_doppelganger(
    "vis-both-left-1",
    vis_both_left_1
  )

  expect_warning(
    vis_theor_left_1 <- gss_tbl |>
      specify(age ~ sex) |>
      hypothesize(null = "independence") |>
      # generate(reps = 100, type = "permute") |>
      calculate(stat = "t", order = c("female", "male")) |>
      visualize(method = "theoretical") +
      shade_p_value(direction = "left", obs_stat = obs_t)
  )
  expect_doppelganger(
    "vis-theor-left-1",
    vis_theor_left_1
  )

  expect_warning(
    vis_both_none_1 <- gss_tbl |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 1) |>
      generate(reps = 100) |>
      calculate(stat = "t") |>
      visualize(method = "both")
  )
  expect_doppelganger(
    "vis-both-none-1",
    vis_both_none_1
  )

  expect_warning(
    vis_theor_none_2 <- gss_tbl |>
      specify(age ~ college) |>
      hypothesize(null = "independence") |>
      visualize(method = "theoretical")
  )
  expect_doppelganger(
    "vis-theor-none-2",
    vis_theor_none_2
  )

  expect_warning(
    vis_theor_none_3 <- gss_tbl |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      visualize(method = "theoretical")
  )
  expect_doppelganger(
    "vis-theor-none-3",
    vis_theor_none_3
  )

  expect_warning(
    vis_both_right_1 <- gss_tbl |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "F") |>
      visualize(method = "both") +
      shade_p_value(obs_stat = obs_F, direction = "right")
  )
  expect_doppelganger(
    "vis-both-right-1",
    vis_both_right_1
  )

  expect_warning(
    vis_both_left_2 <- gss_tbl |>
      specify(sex ~ college, success = "female") |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "z", order = c("no degree", "degree")) |>
      visualize(method = "both") +
      shade_p_value(direction = "left", obs_stat = obs_z)
  )
  expect_doppelganger(
    "vis-both-left-2",
    vis_both_left_2
  )

  expect_warning(
    vis_both_right_2 <- gss_tbl |>
      specify(sex ~ partyid, success = "female") |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "Chisq") |>
      visualize(method = "both") +
      shade_p_value(obs_stat = obs_F, direction = "right")
  )
  expect_doppelganger(
    "vis-both-right-2",
    vis_both_right_2
  )

  expect_warning(
    vis_theor_right_1 <- gss_tbl |>
      specify(sex ~ partyid, success = "female") |>
      hypothesize(null = "independence") |>
      # alculate(stat = "Chisq") |>
      visualize(method = "theoretical") +
      shade_p_value(obs_stat = obs_F, direction = "right")
  )
  expect_doppelganger(
    "vis-theor-right-1",
    vis_theor_right_1
  )

  expect_warning(
    vis_both_none_2 <- gss_tbl |>
      specify(partyid ~ NULL) |>
      hypothesize(
        null = "point",
        p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2)
      ) |>
      generate(reps = 100, type = "draw") |>
      calculate(stat = "Chisq") |>
      visualize(method = "both")
  )
  expect_doppelganger(
    "vis-both-none-2",
    vis_both_none_2
  )

  # traditional instead of theoretical
  expect_snapshot(
    error = TRUE,
    gss_tbl |>
      specify(partyid ~ NULL) |>
      hypothesize(
        null = "point",
        p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2)
      ) |>
      #       generate(reps = 100, type = "draw") |>
      #       calculate(stat = "Chisq") |>
      visualize(method = "traditional")
  )

  expect_warning(
    vis_theor_none_4 <- gss_tbl |>
      specify(partyid ~ NULL) |>
      hypothesize(
        null = "point",
        p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2)
      ) |>
      #         generate(reps = 100, type = "draw") |>
      #         calculate(stat = "Chisq") |>
      visualize(method = "theoretical")
  )
  expect_doppelganger(
    "vis-theor-none-4",
    vis_theor_none_4
  )

  expect_doppelganger(
    "vis-sim-both-2",
    gss_tbl |>
      specify(hours ~ sex) |>
      hypothesize(null = "independence") |>
      generate(reps = 10, type = "permute") |>
      calculate(stat = "diff in means", order = c("female", "male")) |>
      visualize() +
      shade_p_value(direction = "both", obs_stat = obs_diff_mean)
  )

  # Produces warning first for not checking conditions but would also error
  expect_snapshot(
    error = TRUE,
    gss_tbl |>
      specify(hours ~ sex) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "diff in means", order = c("female", "male")) |>
      visualize(method = "both") +
      shade_p_value(direction = "both", obs_stat = obs_diff_mean)
  )

  expect_snapshot(
    res_vis_theor_both_1 <- gss_tbl |>
      specify(hours ~ sex) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "diff in means", order = c("female", "male")) |>
      visualize(method = "theoretical") +
      shade_p_value(direction = "both", obs_stat = obs_diff_mean)
  )

  expect_doppelganger("vis-theor-both-1", res_vis_theor_both_1)

  expect_warning(
    vis_theor_both_2 <- gss_tbl |>
      specify(sex ~ NULL, success = "female") |>
      hypothesize(null = "point", p = 0.8) |>
      #         generate(reps = 100, type = "draw") |>
      #         calculate(stat = "z") |>
      visualize(method = "theoretical") +
      shade_p_value(obs_stat = 2, direction = "both")
  )
  expect_doppelganger(
    "vis-theor-both-2",
    vis_theor_both_2
  )

  expect_doppelganger(
    "vis-sim-left-1",
    gss_tbl |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 1.3) |>
      generate(reps = 100, type = "bootstrap") |>
      calculate(stat = "mean") |>
      visualize() +
      shade_p_value(direction = "left", obs_stat = mean(gss_tbl$hours))
  )
})

test_that("mirror_obs_stat works", {
  skip_if(getRversion() < "4.1.0")

  expect_equal(mirror_obs_stat(1:10, 4), c(`60%` = 6.4))
})

test_that("obs_stat as a data.frame works", {
  skip_if(getRversion() < "4.1.0")

  mean_petal_width <- gss_tbl |>
    specify(hours ~ NULL) |>
    calculate(stat = "mean")
  expect_doppelganger(
    "df-obs_stat-1",
    gss_tbl |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 4) |>
      generate(reps = 100, type = "bootstrap") |>
      calculate(stat = "mean") |>
      visualize() +
      shade_p_value(obs_stat = mean_petal_width, direction = "both")
  )

  mean_df_test <- data.frame(x = c(4.1, 1), y = c(1, 2))
  expect_warning(
    df_obs_stat_2 <- gss_tbl |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 4) |>
      generate(reps = 100, type = "bootstrap") |>
      calculate(stat = "mean") |>
      visualize() +
      shade_p_value(obs_stat = mean_df_test, direction = "both")
  )
  expect_doppelganger(
    "df-obs_stat-2",
    df_obs_stat_2
  )
})

test_that('method = "both" behaves nicely', {
  skip_if(getRversion() < "4.1.0")

  expect_snapshot(
    error = TRUE,
    gss_tbl |>
      specify(hours ~ NULL) |>
      hypothesize(null = "point", mu = 4) |>
      generate(reps = 100, type = "bootstrap") |>
      #       calculate(stat = "mean") |>
      visualize(method = "both")
  )

  expect_snapshot(
    res_method_both <- gss_tbl |>
      specify(hours ~ college) |>
      hypothesize(null = "point", mu = 4) |>
      generate(reps = 10, type = "bootstrap") |>
      calculate(stat = "t", order = c("no degree", "degree")) |>
      visualize(method = "both")
  )

  expect_doppelganger("method-both", res_method_both)
})

test_that("Traditional right-tailed tests have warning if not right-tailed", {
  skip_if(getRversion() < "4.1.0")
  withr::local_envvar(SUPPRESS_INFER_MESSAGES = "false")

  expect_snapshot(
    res_ <- gss_tbl |>
      specify(sex ~ partyid, success = "female") |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "Chisq") |>
      visualize(method = "both") +
      shade_p_value(obs_stat = 2, direction = "left")
  )

  expect_snapshot(
    res_ <- gss_tbl |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      generate(reps = 100, type = "permute") |>
      calculate(stat = "F") |>
      visualize(method = "both") +
      shade_p_value(obs_stat = 2, direction = "two_sided")
  )

  expect_snapshot(
    res_ <- gss_tbl |>
      specify(sex ~ partyid, success = "female") |>
      hypothesize(null = "independence") |>
      #       generate(reps = 100, type = "permute") |>
      calculate(stat = "Chisq") |>
      visualize(method = "theoretical") +
      shade_p_value(obs_stat = 2, direction = "left")
  )

  expect_snapshot(
    res_ <- gss_tbl |>
      specify(age ~ partyid) |>
      hypothesize(null = "independence") |>
      #       generate(reps = 100, type = "permute") |>
      calculate(stat = "F") |>
      visualize(method = "theoretical") +
      shade_p_value(obs_stat = 2, direction = "two_sided")
  )
})

test_that("confidence interval plots are working", {
  skip_if(getRversion() < "4.1.0")

  gss_tbl_boot <- gss_tbl |>
    specify(sex ~ college, success = "female") |>
    generate(reps = 100) |>
    calculate(stat = "diff in props", order = c("no degree", "degree"))

  df_error <- tibble::tibble(col1 = rnorm(5), col2 = rnorm(5))
  vec_error <- 1:10

  perc_ci <- gss_tbl_boot |> get_ci()

  expect_snapshot(
    error = TRUE,
    res_ <- gss_tbl_boot |>
      visualize() +
      shade_confidence_interval(endpoints = df_error)
  )

  expect_snapshot(
    res_ <- gss_tbl_boot |>
      visualize() +
      shade_confidence_interval(endpoints = vec_error)
  )

  expect_snapshot(
    res_ci_vis <- gss_tbl_boot |>
      visualize() +
      shade_confidence_interval(endpoints = perc_ci, direction = "between")
  )

  expect_doppelganger("ci-vis", res_ci_vis)
})

test_that("title adapts to not hypothesis testing workflow", {
  skip_if(getRversion() < "4.1.0")

  set.seed(100)
  gss_tbl_boot_tbl <- gss_tbl |>
    specify(response = hours) |>
    generate(reps = 100, type = "bootstrap")

  expect_doppelganger(
    "vis-no-hypothesize-sim",
    gss_tbl_boot_tbl |>
      calculate(stat = "mean") |>
      visualize()
  )
  expect_snapshot(
    res_vis_no_hypothesize_both <- gss_tbl_boot_tbl |>
      calculate(stat = "t") |>
      visualize(method = "both")
  )

  expect_doppelganger("vis-no-hypothesize-both", res_vis_no_hypothesize_both)
})

test_that("warn_right_tail_test works", {
  skip_if(getRversion() < "4.1.0")

  expect_warn_right_tail <- function(stat_name) {
    expect_silent(warn_right_tail_test(NULL, stat_name))
    expect_silent(warn_right_tail_test("right", stat_name))
    expect_snapshot(warn_right_tail_test("left", stat_name))
    expect_snapshot(warn_right_tail_test("two_sided", stat_name))
  }

  expect_warn_right_tail("F")
  expect_warn_right_tail("Chi-Square")
})

test_that("visualize warns about removing `NaN`", {
  skip_if(getRversion() < "4.1.0")

  dist <- gss_tbl_boot_tbl <- gss_tbl |>
    specify(response = hours) |>
    generate(reps = 10, type = "bootstrap") |>
    calculate("mean")

  # A warning should be raised if there is NaN in a visualized dist
  dist$stat[1] <- NaN
  expect_snapshot(res_ <- visualize(dist))

  # And a different warning for plural NaNs
  dist$stat[2] <- NaN
  expect_snapshot(res_ <- visualize(dist))

  # In the case that _all_ values are NaN, error should be raised
  dist$stat <- rep(NaN, nrow(dist))
  expect_snapshot(error = TRUE, res_ <- visualize(dist))
})

test_that("visualize can handle multiple explanatory variables", {
  skip_if(getRversion() < "4.1.0")
  skip_if_not(identical(Sys.info()[["sysname"]], "Darwin"))

  # generate example objects
  null_fits <- gss |>
    specify(hours ~ age + college) |>
    hypothesize(null = "independence") |>
    generate(reps = 20, type = "permute") |>
    fit()

  obs_fit <- gss |>
    specify(hours ~ age + college) |>
    fit()

  conf_ints <-
    get_confidence_interval(
      null_fits,
      point_estimate = obs_fit,
      level = .95
    )

  # visualize with multiple panes
  expect_doppelganger(
    "viz-fit-bare",
    null_fits |>
      visualize()
  )

  # with p values shaded -- test each possible direction
  expect_doppelganger(
    "viz-fit-p-val-both",
    null_fits |>
      visualize() +
      shade_p_value(obs_stat = obs_fit, direction = "both")
  )

  expect_doppelganger(
    "viz-fit-p-val-left",
    null_fits |>
      visualize() +
      shade_p_value(obs_stat = obs_fit, direction = "left")
  )

  expect_snapshot(
    res_viz_fit_p_val_right <-
      null_fits |>
      visualize() +
      shade_p_value(obs_stat = obs_fit, direction = "right")
  )

  expect_doppelganger(
    "viz-fit-p-val-right",
    res_viz_fit_p_val_right
  )

  # with confidence intervals shaded
  expect_doppelganger(
    "viz-fit-conf-int",
    null_fits |>
      visualize() +
      shade_confidence_interval(endpoints = conf_ints)
  )

  # with no hypothesize()
  expect_doppelganger(
    "viz-fit-no-h0",
    gss |>
      specify(hours ~ age + college) |>
      generate(reps = 20, type = "bootstrap") |>
      fit() |>
      visualize()
  )

  # shade_* functions should error with bad input
})

test_that("visualize can handle `assume()` output", {
  skip_if(getRversion() < "4.1.0")

  # F ----------------------------------------------------------------------
  obs_stat <- gss |>
    specify(age ~ partyid) |>
    calculate(stat = "F")

  null_dist <- gss |>
    specify(age ~ partyid) |>
    hypothesize(null = "independence") |>
    assume(distribution = "F")

  expect_doppelganger(
    "viz-assume-f",
    visualize(null_dist)
  )

  expect_doppelganger(
    "viz-assume-f-p-val",
    visualize(null_dist) + shade_p_value(obs_stat, "right")
  )

  # t (mean) -----------------------------------------------------------------
  obs_stat <- gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    calculate(stat = "t")

  null_dist <- gss |>
    specify(response = hours) |>
    hypothesize(null = "point", mu = 40) |>
    assume("t")

  obs_mean <- gss |>
    specify(response = hours) |>
    calculate(stat = "mean")

  ci <-
    get_confidence_interval(
      null_dist,
      level = .95,
      point_estimate = obs_mean
    )

  expect_doppelganger(
    "viz-assume-t",
    visualize(null_dist)
  )

  expect_doppelganger(
    "viz-assume-t-p-val-both",
    visualize(null_dist) + shade_p_value(obs_stat, "both")
  )

  expect_doppelganger(
    "viz-assume-t-p-val-left",
    visualize(null_dist) + shade_p_value(obs_stat, "left")
  )

  expect_doppelganger(
    "viz-assume-t-p-val-right",
    visualize(null_dist) + shade_p_value(obs_stat, "right")
  )

  expect_doppelganger(
    "viz-assume-t-ci",
    visualize(null_dist) + shade_confidence_interval(ci)
  )

  # warns when it ought to --------------------------------------------------
  expect_snapshot(
    res_viz_assume_t_sim <- visualize(null_dist, method = "simulation")
  )
  expect_doppelganger(
    "viz-assume-t-sim",
    res_viz_assume_t_sim
  )

  expect_snapshot(
    res_viz_assume_t_both <- visualize(null_dist, method = "both")
  )

  expect_doppelganger(
    "viz-assume-t-both",
    res_viz_assume_t_both
  )

  # t (diff in means) -----------------------------------------------------------------
  obs_stat <- gss |>
    specify(hours ~ college) |>
    calculate(stat = "t", order = c("degree", "no degree"))

  null_dist <- gss |>
    specify(hours ~ college) |>
    hypothesize(null = "independence") |>
    assume("t")

  obs_diff <- gss |>
    specify(hours ~ college) |>
    calculate(stat = "diff in means", order = c("degree", "no degree"))

  ci <-
    get_confidence_interval(
      null_dist,
      level = .95,
      point_estimate = obs_diff
    )

  expect_doppelganger(
    "viz-assume-2t",
    visualize(null_dist)
  )

  expect_doppelganger(
    "viz-assume-2t-p-val-both",
    visualize(null_dist) + shade_p_value(obs_stat, "both")
  )

  expect_doppelganger(
    "viz-assume-2t-p-val-left",
    visualize(null_dist) + shade_p_value(obs_stat, "left")
  )

  expect_doppelganger(
    "viz-assume-2t-p-val-right",
    visualize(null_dist) + shade_p_value(obs_stat, "right")
  )

  expect_doppelganger(
    "viz-assume-2t-ci",
    visualize(null_dist) + shade_confidence_interval(ci)
  )

  # z (prop) -----------------------------------------------------------------
  obs_stat <- gss |>
    specify(response = sex, success = "female") |>
    hypothesize(null = "point", p = .5) |>
    calculate(stat = "z")

  null_dist <- gss |>
    specify(response = sex, success = "female") |>
    hypothesize(null = "point", p = .5) |>
    assume("z")

  obs_prop <- gss |>
    specify(response = sex, success = "female") |>
    calculate(stat = "prop")

  ci <-
    get_confidence_interval(
      null_dist,
      level = .95,
      point_estimate = obs_prop
    )

  expect_doppelganger(
    "viz-assume-z",
    visualize(null_dist)
  )

  expect_doppelganger(
    "viz-assume-z-p-val-both",
    visualize(null_dist) + shade_p_value(obs_stat, "both")
  )

  expect_doppelganger(
    "viz-assume-z-p-val-left",
    visualize(null_dist) + shade_p_value(obs_stat, "left")
  )

  expect_doppelganger(
    "viz-assume-z-p-val-right",
    visualize(null_dist) + shade_p_value(obs_stat, "right")
  )

  expect_doppelganger(
    "viz-assume-z-ci",
    visualize(null_dist) + shade_confidence_interval(ci)
  )

  # z (diff in props) --------------------------------------------------------
  obs_stat <- gss |>
    specify(college ~ sex, success = "no degree") |>
    calculate(stat = "z", order = c("female", "male"))

  null_dist <- gss |>
    specify(college ~ sex, success = "no degree") |>
    hypothesize(null = "independence") |>
    assume("z")

  obs_diff <- gss |>
    specify(college ~ sex, success = "no degree") |>
    calculate(stat = "diff in props", order = c("female", "male"))

  ci <-
    get_confidence_interval(
      null_dist,
      level = .95,
      point_estimate = obs_diff
    )

  expect_doppelganger(
    "viz-assume-2z",
    visualize(null_dist)
  )

  expect_doppelganger(
    "viz-assume-2z-p-val-both",
    visualize(null_dist) + shade_p_value(obs_stat, "both")
  )

  expect_doppelganger(
    "viz-assume-2z-p-val-left",
    visualize(null_dist) + shade_p_value(obs_stat, "left")
  )

  expect_doppelganger(
    "viz-assume-2z-p-val-right",
    visualize(null_dist) + shade_p_value(obs_stat, "right")
  )

  expect_doppelganger(
    "viz-assume-2z-ci",
    visualize(null_dist) + shade_confidence_interval(ci)
  )
})


================================================
FILE: tests/testthat/test-wrappers.R
================================================
test_that("t_test works", {
  # Two Sample
  expect_snapshot(res_ <- gss_tbl |> t_test(hours ~ sex))

  expect_snapshot(
    error = TRUE,
    gss_tbl |> t_test(response = "hours", explanatory = "sex")
  )

  new_way <- t_test(gss_tbl, hours ~ sex, order = c("male", "female"))
  new_way_alt <- t_test(
    gss_tbl,
    response = hours,
    explanatory = sex,
    order = c("male", "female")
  )
  old_way <- t.test(hours ~ sex, data = gss_tbl) |>
    broom::glance() |>
    dplyr::select(
      statistic,
      t_df = parameter,
      p_value = p.value,
      alternative,
      estimate,
      lower_ci = conf.low,
      upper_ci = conf.high
    )

  expect_equal(new_way, new_way_alt, tolerance = 1e-5)
  expect_equal(new_way, old_way, tolerance = 1e-5)

  # check that the order argument changes output
  new_way2 <- t_test(gss_tbl, hours ~ sex, order = c("female", "male"))
  expect_equal(new_way[["lower_ci"]], -new_way2[["upper_ci"]])
  expect_equal(new_way[["statistic"]], -new_way2[["statistic"]])

  # One Sample
  new_way <- gss_tbl |>
    t_test(hours ~ NULL, mu = 0)
  new_way_alt <- gss_tbl |>
    t_test(response = hours, mu = 0)
  old_way <- t.test(x = gss_tbl$hours, mu = 0) |>
    broom::glance() |>
    dplyr::select(
      statistic,
      t_df = parameter,
      p_value = p.value,
      alternative,
      estimate,
      lower_ci = conf.low,
      upper_ci = conf.high
    )

  expect_equal(new_way, new_way_alt, tolerance = 1e-5)
  expect_equal(new_way, old_way, tolerance = 1e-5)
})

test_that("chisq_test works", {
  # maleependence
  expect_silent(
    gss_tbl |>
      chisq_test(college ~ partyid)
  )
  new_way <- gss_tbl |>
    chisq_test(college ~ partyid)
  new_way_alt <- gss_tbl |>
    chisq_test(response = college, explanatory = partyid)
  old_way <- chisq.test(x = table(gss_tbl$partyid, gss_tbl$college)) |>
    broom::glance() |>
    dplyr::select(statistic, chisq_df = parameter, p_value = p.value)

  expect_equal(new_way, new_way_alt, tolerance = eps)
  expect_equal(new_way, old_way, tolerance = eps)

  # Goodness of Fit
  expect_silent(
    gss_tbl |>
      chisq_test(response = partyid, p = c(.3, .4, .3))
  )
  new_way <- gss_tbl |>
    chisq_test(partyid ~ NULL, p = c(.3, .4, .3))
  new_way_alt <- gss_tbl |>
    chisq_test(response = partyid, p = c(.3, .4, .3))
  old_way <- chisq.test(x = table(gss_tbl$partyid), p = c(.3, .4, .3)) |>
    broom::glance() |>
    dplyr::select(statistic, chisq_df = parameter, p_value = p.value)

  expect_equal(new_way, new_way_alt, tolerance = 1e-5)
  expect_equal(new_way, old_way, tolerance = 1e-5)

  # check that function errors out when response is numeric
  expect_snapshot(
    error = TRUE,
    chisq_test(x = gss_tbl, response = age, explanatory = partyid)
  )

  # check that function errors out when explanatory is numeric
  expect_snapshot(
    error = TRUE,
    chisq_test(x = gss_tbl, response = partyid, explanatory = age)
  )
})

test_that("_stat functions work", {
  # Test of maleependence
  expect_snapshot(
    res_ <- gss_tbl |> chisq_stat(college ~ partyid)
  )

  another_way <- gss_tbl |>
    chisq_test(college ~ partyid) |>
    dplyr::select(statistic)

  expect_snapshot(
    obs_stat_way <- gss_tbl |> chisq_stat(college ~ partyid)
  )
  one_more <- chisq.test(
    table(gss_tbl$partyid, gss_tbl$college)
  )$statistic

  expect_equal(dplyr::pull(another_way), obs_stat_way, ignore_attr = TRUE)
  expect_equal(one_more, obs_stat_way, ignore_attr = TRUE)

  # Goodness of Fit
  new_way <- gss_tbl |>
    chisq_test(partyid ~ NULL) |>
    dplyr::select(statistic)

  expect_snapshot(
    obs_stat_way <- gss_tbl |>
      chisq_stat(partyid ~ NULL)
  )
  expect_snapshot(
    obs_stat_way_alt <- gss_tbl |>
      chisq_stat(response = partyid)
  )

  expect_equal(dplyr::pull(new_way), obs_stat_way, ignore_attr = TRUE)
  expect_equal(dplyr::pull(new_way), obs_stat_way_alt, ignore_attr = TRUE)

  # robust to the named vector
  unordered_p <- gss_tbl |>
    chisq_test(response = partyid, p = c(.2, .3, .5))
  ordered_p <- gss_tbl |>
    chisq_test(response = partyid, p = c(ind = .2, rep = .3, dem = .5))

  expect_equal(unordered_p, ordered_p, ignore_attr = TRUE)

  # Two sample t
  expect_snapshot(
    res_ <- gss_tbl |>
      t_stat(
        hours ~ sex,
        order = c("male", "female")
      )
  )
  another_way <- gss_tbl |>
    t_test(hours ~ sex, order = c("male", "female")) |>
    dplyr::select(statistic) |>
    pull()

  expect_snapshot(
    obs_stat_way <- gss_tbl |>
      t_stat(hours ~ sex, order = c("male", "female"))
  )

  expect_snapshot(
    obs_stat_way_alt <- gss_tbl |>
      t_stat(response = hours, explanatory = sex, order = c("male", "female"))
  )

  expect_equal(another_way, obs_stat_way, ignore_attr = TRUE)
  expect_equal(another_way, obs_stat_way_alt, ignore_attr = TRUE)

  # One sample t
  expect_snapshot(
    res_ <- gss_tbl |> t_stat(hours ~ NULL)
  )

  another_way <- gss_tbl |>
    t_test(hours ~ NULL) |>
    dplyr::select(statistic) |>
    pull()

  expect_snapshot(
    obs_stat_way <- gss_tbl |>
      t_stat(hours ~ NULL)
  )
  expect_snapshot(
    obs_stat_way_alt <- gss_tbl |>
      t_stat(response = hours)
  )

  expect_equal(another_way, obs_stat_way, ignore_attr = TRUE)
  expect_equal(another_way, obs_stat_way_alt, ignore_attr = TRUE)

  expect_snapshot(
    error = TRUE,
    res_ <- chisq_stat(x = gss_tbl, response = age, explanatory = sex)
  )

  expect_snapshot(
    error = TRUE,
    res_ <- chisq_stat(x = gss_tbl, response = sex, explanatory = age)
  )
})

test_that("conf_int argument works", {
  expect_equal(
    names(
      gss_tbl |>
        t_test(hours ~ sex, order = c("male", "female"), conf_int = FALSE)
    ),
    c("statistic", "t_df", "p_value", "alternative", "estimate"),
    tolerance = 1e-5
  )
  expect_equal(
    names(
      gss_tbl |>
        t_test(
          hours ~ sex,
          order = c("male", "female"),
          conf_int = TRUE
        )
    ),
    c(
      "statistic",
      "t_df",
      "p_value",
      "alternative",
      "estimate",
      "lower_ci",
      "upper_ci"
    ),
    tolerance = 1e-5
  )

  ci_test <- gss_tbl |>
    t_test(
      hours ~ sex,
      order = c("male", "female"),
      conf_int = TRUE,
      conf_level = 0.9
    )
  old_way <- t.test(
    formula = hours ~ sex,
    data = gss_tbl,
    conf.level = 0.9
  )[["conf.int"]]
  expect_equal(ci_test$lower_ci[1], old_way[1], tolerance = 1e-5)
  expect_equal(ci_test$upper_ci[1], old_way[2], tolerance = 1e-5)

  expect_snapshot(
    error = TRUE,
    res_ <- gss_tbl |>
      t_test(
        hours ~ sex,
        order = c("female", "male"),
        conf_int = TRUE,
        conf_level = 1.1
      )
  )

  # Check that var.equal produces different results
  # Thanks for fmaleing this @EllaKaye!
  gss_tbl_small <- gss_tbl |> dplyr::slice(1:6, 90:100)

  expect_snapshot(
    no_var_equal <- gss_tbl_small |>
      t_stat(hours ~ sex, order = c("female", "male"))
  )

  expect_snapshot(
    var_equal <- gss_tbl_small |>
      t_stat(
        hours ~ sex,
        order = c("female", "male"),
        var.equal = TRUE
      )
  )

  expect_false(no_var_equal == var_equal)

  shortcut_no_var_equal <- gss_tbl_small |>
    specify(hours ~ sex) |>
    calculate(stat = "t", order = c("female", "male"))

  shortcut_var_equal <- gss_tbl_small |>
    specify(hours ~ sex) |>
    calculate(
      stat = "t",
      order = c("female", "male"),
      var.equal = TRUE
    )
  expect_false(shortcut_no_var_equal == shortcut_var_equal)
})

# generate some data to test the prop.test wrapper
df <- data.frame(
  exp = rep(c("a", "b"), each = 500),
  resp = c(rep("c", 450), rep("d", 50), rep("c", 400), rep("d", 100)),
  stringsAsFactors = FALSE
)

sum_df <- table(df)

bad_df <- data.frame(resp = 1:5, exp = letters[1:5])

bad_df2 <- data.frame(resp = letters[1:5], exp = 1:5)

df_l <- df |>
  dplyr::mutate(resp = dplyr::if_else(resp == "c", TRUE, FALSE))

test_that("two sample prop_test works", {
  # run the tests with default args
  base <- prop.test(sum_df)
  infer <- prop_test(df, resp ~ exp, order = c("a", "b"))

  # check that results are same
  expect_equal(base[["statistic"]], infer[["statistic"]], tolerance = .001)
  expect_equal(base[["parameter"]], infer[["chisq_df"]])
  expect_equal(base[["p.value"]], infer[["p_value"]], tolerance = .001)

  # expect warning for unspecified order
  expect_snapshot(res_ <- prop_test(df, resp ~ exp))

  # check that the functions respond to "p" in the same way
  base2 <- prop.test(sum_df, p = c(.1, .1))
  infer2 <- prop_test(df, resp ~ exp, order = c("a", "b"), p = c(.1, .1))
  expect_equal(base2[["statistic"]], infer2[["statistic"]], tolerance = .001)
  expect_equal(base2[["parameter"]], infer2[["chisq_df"]])
  expect_equal(base2[["p.value"]], infer2[["p_value"]], tolerance = .001)

  # check confidence interval argument
  infer3 <- prop_test(df, resp ~ exp, order = c("a", "b"), conf_int = TRUE)
  expect_length(infer3, 6)
  expect_length(infer2, 4)

  # check that the order argument changes output
  infer4 <- prop_test(df, resp ~ exp, order = c("b", "a"), conf_int = TRUE)
  expect_equal(infer4[["lower_ci"]], -infer3[["upper_ci"]], tolerance = .001)

  expect_snapshot(error = TRUE, res_ <- prop_test(bad_df, resp ~ exp))
  expect_snapshot(error = TRUE, res_ <- prop_test(bad_df2, resp ~ exp))

  # check that the success argument changes output
  infer5 <- prop_test(
    df,
    resp ~ exp,
    order = c("a", "b"),
    success = "d",
    conf_int = TRUE
  )
  expect_equal(infer3[["upper_ci"]], -infer5[["lower_ci"]], tolerance = .001)

  # check that logical variables are leveled intuitively
  infer1_l <- prop_test(df_l, resp ~ exp, order = c("b", "a"))
  infer2_l <- prop_test(df_l, resp ~ exp, order = c("b", "a"), success = "TRUE")
  infer3_l <- prop_test(
    df_l,
    resp ~ exp,
    order = c("b", "a"),
    success = "FALSE"
  )

  expect_equal(infer1_l$lower_ci, infer2_l$lower_ci)
  expect_equal(infer1_l$lower_ci, -infer3_l$upper_ci)
})

# ...and some data for the one sample wrapper
df_1 <- df |>
  select(resp)

sum_df_1 <- table(df_1)

test_that("one sample prop_test works", {
  # check that results with default args are the same
  base <- prop.test(sum_df_1)
  infer <- prop_test(df_1, resp ~ NULL, p = .5)
  expect_equal(base[["statistic"]], infer[["statistic"]], tolerance = .001)
  expect_equal(base[["parameter"]], infer[["chisq_df"]])
  expect_equal(base[["p.value"]], infer[["p_value"]], tolerance = .001)

  # check that the functions respond to "p" in the same way
  base2 <- prop.test(sum_df_1, p = .86)
  infer2 <- prop_test(df_1, resp ~ NULL, p = .86)
  expect_equal(base2[["statistic"]], infer2[["statistic"]], tolerance = .001)
  expect_equal(base2[["parameter"]], infer2[["chisq_df"]])
  expect_equal(base2[["p.value"]], infer2[["p_value"]], tolerance = .001)

  # expect message for unspecified p
  expect_snapshot(res_ <- prop_test(df_1, resp ~ NULL))

  # check that the success argument changes output
  infer3 <- prop_test(df_1, resp ~ NULL, p = .2, success = "c")
  infer4 <- prop_test(df_1, resp ~ NULL, p = .8, success = "d")
  expect_equal(infer3[["chisq_df"]], infer4[["chisq_df"]], tolerance = .001)
  expect_snapshot(
    error = TRUE,
    res_ <- prop_test(df_1, resp ~ NULL, p = .2, success = "b")
  )
})

test_that("prop_test output dimensionality is correct", {
  infer_1_sample <- prop_test(df, resp ~ NULL, p = .5)
  infer_1_sample_z <- prop_test(df, resp ~ NULL, p = .5, z = TRUE)
  infer_2_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))
  infer_2_sample_no_int <- prop_test(
    df,
    resp ~ exp,
    order = c("a", "b"),
    conf_int = FALSE
  )
  infer_2_sample_z <- prop_test(df, resp ~ exp, order = c("a", "b"), z = TRUE)

  expect_length(infer_1_sample, 4)
  expect_length(infer_1_sample, length(infer_1_sample_z) + 1)
  expect_length(infer_2_sample, 6)
  expect_length(infer_2_sample_no_int, 4)
  expect_length(infer_2_sample_z, length(infer_2_sample) - 1)
})

test_that("prop_test handles >2 explanatory levels gracefully", {
  set.seed(1)
  dfr <-
    tibble::tibble(
      exp = sample(c("a", "b", "c"), 100, replace = TRUE),
      resp = sample(c("d", "e"), 100, replace = TRUE)
    )

  res_old <- prop.test(table(dfr))

  # don't pass order
  expect_silent(
    res_1 <- prop_test(dfr, resp ~ exp)
  )

  # pass 2-length order
  expect_snapshot(
    res_2 <- prop_test(dfr, resp ~ exp, order = c("a", "b"))
  )

  # pass 3-length order
  expect_snapshot(
    res_3 <- prop_test(dfr, resp ~ exp, order = c("a", "b", "c"))
  )

  expect_equal(res_1, res_2)
  expect_equal(res_2, res_3)

  expect_named(res_1, c("statistic", "chisq_df", "p_value"))
  expect_equal(res_1$statistic, res_old$statistic)
  expect_equal(res_1$chisq_df, res_old$parameter)
  expect_equal(res_1$p_value, res_old$p.value)
})

test_that("prop_test errors with >2 response levels", {
  set.seed(1)
  dfr <-
    tibble::tibble(
      exp = sample(c("a", "b"), 100, replace = TRUE),
      resp = sample(c("c", "d", "e"), 100, replace = TRUE)
    )

  expect_snapshot(
    error = TRUE,
    res_1 <- prop_test(dfr, resp ~ exp)
  )
})

test_that("prop_test z argument works as expected", {
  chi_res <- prop_test(df, resp ~ NULL, p = .5, correct = FALSE)

  z_res <- prop_test(df, resp ~ NULL, p = .5, z = TRUE)

  expect_equal(unname(chi_res$statistic), z_res$statistic^2, tolerance = eps)
})

test_that("wrappers can handled ordered factors", {
  expect_equal(
    gss_tbl |>
      dplyr::mutate(sex = factor(sex, ordered = FALSE)) |>
      t_test(hours ~ sex, order = c("male", "female")),
    gss_tbl |>
      dplyr::mutate(sex = factor(sex, ordered = TRUE)) |>
      t_test(hours ~ sex, order = c("male", "female"))
  )

  expect_snapshot(
    ordered_t_1 <- gss_tbl |>
      dplyr::mutate(income = factor(income, ordered = TRUE)) |>
      chisq_test(income ~ partyid)
  )

  expect_snapshot(
    ordered_f_1 <- gss_tbl |>
      dplyr::mutate(income = factor(income, ordered = FALSE)) |>
      chisq_test(income ~ partyid)
  )

  expect_equal(ordered_t_1, ordered_f_1)

  expect_snapshot(
    ordered_t_2 <- gss_tbl |>
      dplyr::mutate(income = factor(income, ordered = TRUE)) |>
      chisq_test(partyid ~ income)
  )

  expect_snapshot(
    ordered_f_2 <- gss_tbl |>
      dplyr::mutate(income = factor(income, ordered = FALSE)) |>
      chisq_test(partyid ~ income)
  )

  expect_equal(ordered_t_2, ordered_f_2)

  expect_equal(
    df |>
      dplyr::mutate(resp = factor(resp, ordered = TRUE)) |>
      prop_test(resp ~ NULL, p = .5),
    df |>
      dplyr::mutate(resp = factor(resp, ordered = FALSE)) |>
      prop_test(resp ~ NULL, p = .5)
  )
})

test_that("handles spaces in variable names (t_test)", {
  gss_ <- gss |>
    tidyr::drop_na(college) |>
    dplyr::mutate(`h o u r s` = hours)

  expect_equal(
    t_test(
      gss_,
      formula = hours ~ college,
      order = c("degree", "no degree"),
      alternative = "two-sided"
    ),
    t_test(
      gss_,
      formula = `h o u r s` ~ college,
      order = c("degree", "no degree"),
      alternative = "two-sided"
    )
  )

  expect_equal(
    t_test(
      gss_,
      response = hours,
      explanatory = college,
      order = c("degree", "no degree"),
      alternative = "two-sided"
    ),
    t_test(
      gss_,
      response = `h o u r s`,
      explanatory = college,
      order = c("degree", "no degree"),
      alternative = "two-sided"
    )
  )
})

test_that("handles spaces in variable names (prop_test)", {
  df$`r e s p` <- df$resp

  expect_equal(
    prop_test(df, `r e s p` ~ exp, order = c("a", "b")),
    prop_test(df, resp ~ exp, order = c("a", "b"))
  )

  expect_equal(
    prop_test(df, response = `r e s p`, explanatory = exp, order = c("a", "b")),
    prop_test(df, response = resp, explanatory = exp, order = c("a", "b"))
  )
})


================================================
FILE: tests/testthat.R
================================================
# This file is part of the standard setup for testthat.
# It is recommended that you do not modify it.
#
# Where should you do additional test configuration?
# Learn more about the roles of various files in:
# * https://r-pkgs.org/tests.html
# * https://testthat.r-lib.org/reference/test_package.html#special-files

library(testthat)
library(infer)

test_check("infer")


================================================
FILE: vignettes/anova.Rmd
================================================
---
title: "Tidy ANOVA (Analysis of Variance) with infer"
description: "Conducting ANOVA (Analysis of Variance) on tidy data with infer."
output: rmarkdown::html_vignette
vignette: |
  %\VignetteIndexEntry{Tidy ANOVA (Analysis of Variance) with infer}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
---

```{r}
#| label: settings
#| include: false
knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) 
options(digits = 4)
```

```{r}
#| label: load-packages
#| echo: false
#| message: false
#| warning: false
library(ggplot2)
library(dplyr)
library(infer)
```

In this vignette, we'll walk through conducting an analysis of variance (ANOVA) test using infer. ANOVAs are used to analyze differences in group means.

Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this:

```{r}
#| label: glimpse-gss-actual
#| warning: false
#| message: false
dplyr::glimpse(gss)
```

To carry out an ANOVA, we'll examine the association between age and political party affiliation in the United States. The `age` variable is a numerical variable measuring the respondents' age at the time that the survey was taken, and `partyid` is a factor variable with unique values `r unique(gss$partyid)`.

This is what the relationship looks like in the observed data:

```{r}
#| label: plot-f
#| echo: false
gss |>
  ggplot2::ggplot() +
  ggplot2::aes(x = partyid, y = age) +
  ggplot2::geom_boxplot() +
  ggplot2::scale_fill_brewer(type = "qual") +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, 
                                                     vjust = .5)) +
    ggplot2::labs(x = "partyid: Political Party Affiliation",
                  y = "age: Age of Respondent")
```

If there were no relationship, we would expect to see the each of these boxplots lining up along the y-axis. It looks like the average age of democrats and republicans seems to be a bit larger than independent and other American voters. Is this difference just random noise, though?

First, to calculate the observed statistic, we can use `specify()` and `calculate()`.

```{r}
#| label: calc-obs-stat-f
#| warning: false
#| message: false
# calculate the observed statistic
observed_f_statistic <- gss |>
  specify(age ~ partyid) |>
  hypothesize(null = "independence") |>
  calculate(stat = "F")
```

The observed $F$ statistic is `r observed_f_statistic`. Now, we want to compare this statistic to a null distribution, generated under the assumption that age and political party affiliation are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between the two variables.

We can `generate()` an approximation of the null distribution using randomization. The randomization approach permutes the response and explanatory variables, so that each person's party affiliation is matched up with a random age from the sample in order to break up any association between the two.

```{r}
#| label: generate-null-f
#| warning: false
#| message: false
# generate the null distribution using randomization
null_dist <- gss |>
  specify(age ~ partyid) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  calculate(stat = "F")
```

Note that, in the line `specify(age ~ partyid)` above, we could use the equivalent syntax `specify(response = age, explanatory = partyid)`. 

To get a sense for what this distribution looks like, and where our observed statistic falls, we can use `visualize()`:

```{r}
#| label: visualize-f
#| warning: false
#| message: false
# visualize the null distribution and test statistic!
null_dist |>
  visualize() + 
  shade_p_value(observed_f_statistic,
                direction = "greater")
```

We could also visualize the observed statistic against the theoretical null distribution. To do so, use the `assume()` verb to define a theoretical null distribution and then pass it to `visualize()` like a null distribution outputted from `generate()` and `calculate()`.

```{r}
#| label: visualize-f-theor
#| warning: false
#| message: false
# visualize the theoretical null distribution and test statistic!
null_dist_theory <- gss |>
  specify(age ~ partyid) |>
  assume(distribution = "F")

visualize(null_dist_theory) +
  shade_p_value(observed_f_statistic,
                direction = "greater")
```

To visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and then further provide `method = "both"` to `visualize()`.

```{r}
#| label: visualize-indep-both
#| warning: false
#| message: false
# visualize both null distributions and the test statistic!
null_dist |>
  visualize(method = "both") + 
  shade_p_value(observed_f_statistic,
                direction = "greater")
```

Either way, it looks like our observed test statistic would be quite unlikely if there were actually no association between age and political party affiliation. More exactly, we can approximate the p-value from the randomization-based approximation to the null distribution:

```{r}
#| label: p-value-indep
#| warning: false
#| message: false
# calculate the p value from the observed statistic and null distribution
p_value <- null_dist |>
  get_p_value(obs_stat = observed_f_statistic,
              direction = "greater")

p_value
```

Thus, if there were really no relationship between age and political party affiliation, our approximation of the probability that we would see a statistic as or more extreme than `r observed_f_statistic` is approximately `r p_value`.

To calculate the p-value using the true $F$ distribution, we can use the `pf()` function from base R. This function allows us to situate the test statistic we calculated previously in the $F$ distribution with the appropriate degrees of freedom.

```{r}
pf(observed_f_statistic$stat, 3, 496, lower.tail = FALSE)
```

Note that, while the observed statistic stays the same, the resulting p-value differs slightly between these two approaches since the randomization-based empirical $F$ distribution is an approximation of the true $F$ distribution.

The package currently does not supply a wrapper for tidy ANOVA tests.


================================================
FILE: vignettes/chi_squared.Rmd
================================================
---
title: "Tidy Chi-Squared Tests with infer"
description: "Conducting Chi-Squared tests on tidy data with infer."
output: rmarkdown::html_vignette
vignette: |
  %\VignetteIndexEntry{Tidy Chi-Squared Tests with infer}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
---

```{r}
#| label: settings
#| include: false
knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) 
options(digits = 4)
```

```{r}
#| label: load-packages
#| echo: false
#| message: false
#| warning: false
library(ggplot2)
library(dplyr)
library(infer)
```

### Introduction

In this vignette, we'll walk through conducting a $\chi^2$ (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We'll start out with a  chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we'll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution.

Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this:

```{r}
#| label: glimpse-gss-actual
#| warning: false
#| message: false
dplyr::glimpse(gss)
```

### Test of Independence

To carry out a chi-squared test of independence, we'll examine the association between income and educational attainment in the United States. `college` is a categorical variable with values `degree` and `no degree`, indicating whether or not the respondent has a college degree (including community college), and `finrela` gives the respondent's self-identification of family income---either `far below average`, `below average`, `average`, `above average`, `far above average`, or `DK` (don't know).

This is what the relationship looks like in the sample data:

```{r}
#| label: plot-indep
#| echo: false
gss |>
  ggplot2::ggplot() +
  ggplot2::aes(x = finrela, fill = college) +
  ggplot2::geom_bar(position = "fill") +
  ggplot2::scale_fill_brewer(type = "qual") +
  ggplot2::theme(axis.text.x = ggplot2::element_text(
    angle = 45,
    vjust = .5
  )) +
  ggplot2::labs(
    x = "finrela: Self-Identification of Income Class",
    y = "Proportion"
  )
```

If there were no relationship, we would expect to see the purple bars reaching to the same height, regardless of income class. Are the differences we see here, though, just due to random noise?

First, to calculate the observed statistic, we can use `specify()` and `calculate()`.

```{r}
#| label: calc-obs-stat-indep
#| warning: false
#| message: false
# calculate the observed statistic
observed_indep_statistic <- gss |>
  specify(college ~ finrela) |>
  hypothesize(null = "independence") |>
  calculate(stat = "Chisq")
```

The observed $\chi^2$ statistic is `r observed_indep_statistic`. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between education and income.

We can `generate()` the null distribution in one of two ways---using randomization or theory-based methods. The randomization approach approximates the null distribution by permuting the response and explanatory variables, so that each person's educational attainment is matched up with a random income from the sample in order to break up any association between the two.

```{r}
#| label: generate-null-indep
#| warning: false
#| message: false
# generate the null distribution using randomization
null_dist_sim <- gss |>
  specify(college ~ finrela) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  calculate(stat = "Chisq")
```

Note that, in the line `specify(college ~ finrela)` above, we could use the equivalent syntax `specify(response = college, explanatory = finrela)`. The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization.

```{r}
#| label: generate-null-indep-t
#| warning: false
#| message: false
# generate the null distribution by theoretical approximation
null_dist_theory <- gss |>
  specify(college ~ finrela) |>
  assume(distribution = "Chisq")
```

To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:

```{r}
#| label: visualize-indep
#| warning: false
#| message: false
# visualize the null distribution and test statistic!
null_dist_sim |>
  visualize() +
  shade_p_value(observed_indep_statistic,
    direction = "greater"
  )
```

We could also visualize the observed statistic against the theoretical null distribution. To do so, use the `assume()` verb to define a theoretical null distribution and then pass it to `visualize()` like a null distribution outputted from `generate()` and `calculate()`.

```{r}
#| label: visualize-indep-theor
#| warning: false
#| message: false
# visualize the theoretical null distribution and test statistic!
gss |>
  specify(college ~ finrela) |>
  assume(distribution = "Chisq") |>
  visualize() +
  shade_p_value(observed_indep_statistic,
    direction = "greater"
  )
```

To visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and further provide `method = "both"`.

```{r}
#| label: visualize-indep-both
#| warning: false
#| message: false
# visualize both null distributions and the test statistic!
null_dist_sim |>
  visualize(method = "both") +
  shade_p_value(observed_indep_statistic,
    direction = "greater"
  )
```

Either way, it looks like our observed test statistic would be quite unlikely if there were actually no association between education and income. More exactly, we can approximate the p-value with `get_p_value`:

```{r}
#| label: p-value-indep
#| warning: false
#| message: false
# calculate the p value from the observed statistic and null distribution
p_value_independence <- null_dist_sim |>
  get_p_value(
    obs_stat = observed_indep_statistic,
    direction = "greater"
  )

p_value_independence
```

Thus, if there were really no relationship between education and income, our approximation of the probability that we would see a statistic as or more extreme than `r observed_indep_statistic` is approximately `r p_value_independence`.

To calculate the p-value using the true $\chi^2$ distribution, we can use the `pchisq` function from base R. This function allows us to situate the test statistic we calculated previously in the $\chi^2$ distribution with the appropriate degrees of freedom.

```{r}
pchisq(observed_indep_statistic$stat, 5, lower.tail = FALSE)
```

Note that, equivalently to the theory-based approach shown above, the package supplies a wrapper function, `chisq_test`, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this:

```{r}
#| label: chisq-indep-wrapper
#| message: false
#| warning: false
chisq_test(gss, college ~ finrela)
```


### Goodness of Fit

Now, moving on to a chi-squared goodness of fit test, we'll take a look at the self-identified income class of our survey respondents. Suppose our null hypothesis is that `finrela` follows a uniform distribution (i.e. there's actually an equal number of people that describe their income as far below average, below average, average, above average, far above average, or that don't know their income.) The graph below represents this hypothesis:

```{r}
#| label: gof-plot
#| echo: false
gss |>
  ggplot2::ggplot() +
  ggplot2::aes(x = finrela) +
  ggplot2::geom_bar() +
  ggplot2::geom_hline(yintercept = 466.3, col = "red") +
  ggplot2::labs(
    x = "finrela: Self-Identification of Income Class",
    y = "Number of Responses"
  )
```

It seems like a uniform distribution may not be the most appropriate description of the data--many more people describe their income as average than than any of the other options. Lets now test whether this difference in distributions is statistically significant.

First, to carry out this hypothesis test, we would calculate our observed statistic.

```{r}
#| label: observed-gof-statistic
#| warning: false
#| message: false
# calculating the null distribution
observed_gof_statistic <- gss |>
  specify(response = finrela) |>
  hypothesize(
    null = "point",
    p = c(
      "far below average" = 1 / 6,
      "below average" = 1 / 6,
      "average" = 1 / 6,
      "above average" = 1 / 6,
      "far above average" = 1 / 6,
      "DK" = 1 / 6
    )
  ) |>
  calculate(stat = "Chisq")
```

The observed statistic is `r observed_gof_statistic`. Now, generating a null distribution, by just dropping in a call to `generate()`:


```{r}
#| label: null-distribution-gof
#| warning: false
#| message: false
# generating a null distribution, assuming each income class is equally likely
null_dist_gof <- gss |>
  specify(response = finrela) |>
  hypothesize(
    null = "point",
    p = c(
      "far below average" = 1 / 6,
      "below average" = 1 / 6,
      "average" = 1 / 6,
      "above average" = 1 / 6,
      "far above average" = 1 / 6,
      "DK" = 1 / 6
    )
  ) |>
  generate(reps = 1000, type = "draw") |>
  calculate(stat = "Chisq")
```

Again, to get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:

```{r}
#| label: visualize-indep-gof
#| warning: false
#| message: false
# visualize the null distribution and test statistic!
null_dist_gof |>
  visualize() +
  shade_p_value(observed_gof_statistic,
    direction = "greater"
  )
```

This statistic seems like it would be quite unlikely if income class self-identification actually followed a uniform distribution! How unlikely, though? Calculating the p-value:

```{r}
#| label: get-p-value-gof
#| warning: false
#| message: false
# calculate the p-value
p_value_gof <- null_dist_gof |>
  get_p_value(
    observed_gof_statistic,
    direction = "greater"
  )

p_value_gof
```

Thus, if each self-identified income class was equally likely to occur, our approximation of the probability that we would see a distribution like the one we did is approximately `r p_value_gof`.

To calculate the p-value using the true $\chi^2$ distribution, we can use the `pchisq` function from base R. This function allows us to situate the test statistic we calculated previously in the $\chi^2$ distribution with the appropriate degrees of freedom.

```{r}
pchisq(observed_gof_statistic$stat, 5, lower.tail = FALSE)
```

Again, equivalently to the theory-based approach shown above, the package supplies a wrapper function, `chisq_test()`, to carry out Chi-Squared goodness of fit tests on tidy data. The syntax goes like this:

```{r}
#| label: chisq-gof-wrapper
#| message: false
#| warning: false
chisq_test(
  gss,
  response = finrela,
  p = c(
    "far below average" = 1 / 6,
    "below average" = 1 / 6,
    "average" = 1 / 6,
    "above average" = 1 / 6,
    "far above average" = 1 / 6,
    "DK" = 1 / 6
  )
)
```


================================================
FILE: vignettes/infer.Rmd
================================================
---
title: "Getting to Know infer"
description: "An introduction to the infer R package."
output: 
  rmarkdown::html_vignette:
    toc: true
vignette: |
  %\VignetteIndexEntry{Getting to Know infer}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
---

```{r}
#| include: false
knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) 
options(digits = 4)
```

### Introduction

infer implements an expressive grammar to perform statistical inference that coheres with the tidyverse design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract value from their outputs.

Regardless of which hypothesis test we're using, we're still asking the same kind of question: is the effect/difference in our observed data real, or due to chance? To answer this question, we start by assuming that the observed data came from some world where "nothing is going on" (i.e. the observed effect was simply due to random chance), and call this assumption our *null hypothesis*. (In reality, we might not believe in the null hypothesis at all---the null hypothesis is in opposition to the *alternate hypothesis*, which supposes that the effect present in the observed data is actually due to the fact that "something is going on.") We then calculate a *test statistic* from our data that describes the observed effect. We can use this test statistic to calculate a *p-value*, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined *significance level* $\alpha$, then we can reject our null hypothesis.

The workflow of this package is designed around this idea. Starting out with some dataset,

+ `specify()` allows you to specify the variable, or relationship between variables, that you're interested in.
+ `hypothesize()` allows you to declare the null hypothesis.
+ `generate()` allows you to generate data reflecting the null hypothesis.
+ `calculate()` allows you to calculate a distribution of statistics from the generated data to form the null distribution.

Throughout this vignette, we make use of `gss`, a dataset supplied by `infer` containing a sample of 500 observations of 11 variables from the *General Social Survey*. 

```{r}
#| label: load-packages
#| echo: false
#| message: false
#| warning: false
library(dplyr)
library(infer)
```


```{r}
#| label: load-gss
#| warning: false
#| message: false
# load in the dataset
data(gss)

# take a look at its structure
dplyr::glimpse(gss)
```

Each row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults.

### specify(): Specifying Response (and Explanatory) Variables

The `specify()` function can be used to specify which of the variables in the dataset you're interested in. If you're only interested in, say, the `age` of the respondents, you might write:

```{r}
#| label: specify-example
#| warning: false
#| message: false
gss |>
  specify(response = age)
```

On the front-end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. Checking the class of this object, though:

```{r}
#| label: specify-one
#| warning: false
#| message: false
gss |>
  specify(response = age) |>
  class()
```

We can see that the `infer` class has been appended on top of the dataframe classes--this new class stores some extra metadata.

If you're interested in two variables--`age` and `partyid`, for example--you can `specify()` their relationship in one of two (equivalent) ways:

```{r}
#| label: specify-two
#| warning: false
#| message: false
# as a formula
gss |>
  specify(age ~ partyid)

# with the named arguments
gss |>
  specify(response = age, explanatory = partyid)
```

If you're doing inference on one proportion or a difference in proportions, you will need to use the `success` argument to specify which level of your `response` variable is a success. For instance, if you're interested in the proportion of the population with a college degree, you might use the following code:

```{r}
#| label: specify-success
#| warning: false
#| message: false
# specifying for inference on proportions
gss |>
  specify(response = college, success = "degree")
```

### hypothesize(): Declaring the Null Hypothesis

The next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of "independence" or "point" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`:

```{r}
#| label: hypothesize-independence
#| warning: false
#| message: false
gss |>
  specify(college ~ partyid, success = "degree") |>
  hypothesize(null = "independence")
```

If you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write:

```{r}
#| label: hypothesize-40-hr-week
#| warning: false
#| message: false
gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40)
```

Again, from the front-end, the dataframe outputted from `hypothesize()` looks almost exactly the same as it did when it came out of `specify()`, but infer now "knows" your null hypothesis.

### generate(): Generating the Null Distribution

Once we've asserted our null hypothesis using `hypothesize()`, we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the `type` argument:

* `bootstrap`: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data.  
* `permute`: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample.  
* `draw`: A value will be sampled from a theoretical distribution with parameters specified in `hypothesize()` for each replicate. This option is currently only applicable for testing point estimates. This generation type was previously called `"simulate"`, which has been superseded.

Continuing on with our example above, about the average number of hours worked a week, we might write:

```{r}
#| label: generate-point
#| warning: false
#| message: false
set.seed(1)

gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  generate(reps = 1000, type = "bootstrap")
```

In the above example, we take 1000 bootstrap samples to form our null distribution.

Note that, before `generate()`ing, we've set the seed for random number generation with the `set.seed()` function. When using the infer package for research, or in other cases when exact reproducibility is a priority, this is good practice. infer will respect the random seed specified in the `set.seed()` function, returning the same result when `generate()`ing data given an identical seed.

To generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 1000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age:

```{r}
#| label: generate-permute
#| warning: false
#| message: false
gss |>
  specify(partyid ~ age) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute")
```

### calculate(): Calculating Summary Statistics

`calculate()` calculates summary statistics from the output of infer core functions. The function takes in a `stat` argument, which is currently one of "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "t", "z", "slope", or "correlation". For example, continuing our example above to calculate the null distribution of mean hours worked per week:

```{r}
#| label: calculate-point
#| warning: false
#| message: false
gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  generate(reps = 1000, type = "bootstrap") |>
  calculate(stat = "mean")
```

The output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or t and z statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write:

```{r}
#| label: specify-diff-in-means
#| warning: false
#| message: false
gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  calculate("diff in means", order = c("degree", "no degree"))
```

### Other Utilities

infer also offers several utilities to extract the meaning out of summary statistics and distributions---the package provides functions to visualize where a statistic is relative to a distribution (with `visualize()`), calculate p-values (with `get_p_value()`), and calculate confidence intervals (with `get_confidence_interval()`).

To illustrate, we'll go back to the example of determining whether the mean number of hours worked per week is 40 hours.

```{r}
#| label: utilities-examples
# find the point estimate
obs_mean <- gss |>
  specify(response = hours) |>
  calculate(stat = "mean")

# generate a null distribution
null_dist <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  generate(reps = 1000, type = "bootstrap") |>
  calculate(stat = "mean")
```

Our point estimate `r obs_mean` seems *pretty* close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn't 40.

We could initially just visualize the null distribution.

```{r}
#| label: visualize
#| warning: false
#| message: false
null_dist |>
  visualize()
```

Where does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this.

```{r}
#| label: visualize2
#| warning: false
#| message: false
null_dist |>
  visualize() +
  shade_p_value(obs_stat = obs_mean, direction = "two-sided")
```

Notice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the `+` operator to apply the `shade_p_value()` function. This is because `visualize` outputs a plot object from `ggplot2` instead of a data frame, and the `+` operator is needed to add the p-value layer to the plot object.) The red bar looks like it's slightly far out on the right tail of the null distribution, so observing a sample mean of `r obs_mean` hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though?

```{r}
#| label: get_p_value
#| warning: false
#| message: false
# get a two-tailed p-value
p_value <- null_dist |>
  get_p_value(obs_stat = obs_mean, direction = "two-sided")

p_value
```

It looks like the p-value is `r p_value`, which is pretty small---if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (`r abs(obs_mean-40)` hours) from 40 would be `r p_value`. This may or may not be statistically significantly different, depending on the significance level $\alpha$ you decided on *before* you ran this analysis. If you had set $\alpha = .05$, then this difference would be statistically significant, but if you had set $\alpha = .01$, then it would not be.

To get a confidence interval around our estimate, we can write:

```{r}
#| label: get_conf
#| message: false
#| warning: false
# generate a distribution like the null distribution, 
# though exclude the null hypothesis from the pipeline
boot_dist <- gss |>
  specify(response = hours) |>
  generate(reps = 1000, type = "bootstrap") |>
  calculate(stat = "mean")

# start with the bootstrap distribution
ci <- boot_dist |>
  # calculate the confidence interval around the point estimate
  get_confidence_interval(
    point_estimate = obs_mean,
    # at the 95% confidence level
    level = .95,
    # using the standard error
    type = "se"
  )

ci
```

As you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level $\alpha = .05$. To see this interval represented visually, we can use the `shade_confidence_interval()` utility:

```{r}
#| label: visualize-ci
#| warning: false
#| message: false
boot_dist |>
  visualize() +
  shade_confidence_interval(endpoints = ci)
```

### Theoretical Methods

infer also provides functionality to use theoretical methods for `"Chisq"`, `"F"`, `"t"` and `"z"` distributions. 

Generally, to find a null distribution using theory-based methods, use the same code that you would use to find the observed statistic elsewhere, replacing calls to `calculate()` with `assume()`. For example, to calculate the observed $t$ statistic (a standardized mean):

```{r}
#| message: false
#| warning: false
# calculate an observed t statistic
obs_t <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t")
```

Then, to define a theoretical $t$ distribution, we could write:

```{r}
#| message: false
#| warning: false
# switch out calculate with assume to define a distribution
t_dist <- gss |>
  specify(response = hours) |>
  assume(distribution = "t")
```

From here, the theoretical distribution interfaces in the same way that simulation-based null distributions do. For example, to interface with p-values:

```{r}
#| message: false
#| warning: false
# visualize the theoretical null distribution
visualize(t_dist) +
  shade_p_value(obs_stat = obs_t, direction = "greater")

# more exactly, calculate the p-value
get_p_value(t_dist, obs_t, "greater")
```

Confidence intervals lie on the scale of the data rather than on the standardized scale of the theoretical distribution, so be sure to use the unstandardized observed statistic when working with confidence intervals.

```{r}
#| message: false
#| warning: false
# find the theory-based confidence interval
theor_ci <- 
  get_confidence_interval(
    x = t_dist,
    level = .95,
    point_estimate = obs_mean
  )

theor_ci
```

When visualized, the $t$ distribution will be recentered and rescaled to align with the scale of the observed data.

```{r}
# visualize the theoretical sampling distribution
visualize(t_dist) +
  shade_confidence_interval(theor_ci)
```

### Multiple regression

To accommodate randomization-based inference with multiple explanatory variables, the package implements an alternative workflow based on model fitting. Rather than `calculate()`ing statistics from resampled data, this side of the package allows you to `fit()` linear models on data resampled according to the null hypothesis, supplying model coefficients for each explanatory variable. For the most part, you can just switch out `calculate()` for `fit()` in your `calculate()`-based workflows.

As an example, suppose that we want to fit `hours` worked per week using the respondent `age` and `college` completion status. We could first begin by fitting a linear model to the observed data.

```{r}
observed_fit <- gss |>
  specify(hours ~ age + college) |>
  fit()
```

Now, to generate null distributions for each of these terms, we can fit 1000 models to resamples of the `gss` dataset, where the response `hours` is permuted in each. Note that this code is the same as the above except for the addition of the `hypothesize()` and `generate()` step.

```{r}
null_fits <- gss |>
  specify(hours ~ age + college) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  fit()

null_fits
```

To permute variables other than the response variable, the `variables` argument to `generate()` allows you to choose columns from the data to permute. Note that any derived effects that depend on these columns (e.g., interaction effects) will also be affected.

Beyond this point, observed fits and distributions from null fits interface exactly like analogous outputs from `calculate()`. For instance, we can use the following code to calculate a 95% confidence interval from these objects.

```{r}
get_confidence_interval(
  null_fits, 
  point_estimate = observed_fit, 
  level = .95
)
```

Or, we can shade p-values for each of these observed regression coefficients from the observed data.

```{r}
visualize(null_fits) + 
  shade_p_value(observed_fit, direction = "both")
```

### Conclusion

That's it! This vignette covers most all of the key functionality of infer. See `help(package = "infer")` for a full list of functions and vignettes.


================================================
FILE: vignettes/infer_cache/html/__packages
================================================
base
usethis
devtools
dplyr
testthat
infer


================================================
FILE: vignettes/infer_cache/html/calculate-point_94c073b633c3cf7bef3252dcad544ee2.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/generate-permute_21b25928d642a97a30057306d51f1b23.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/generate-point_d562524427be20dbb4736ca1ea29b04b.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/hypothesize-40-hr-week_c8e33c404efa90c2ca0b2eacad95b06c.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/hypothesize-independence_fe1c79b9f1dc0df488828fdd34c8145f.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/specify-diff-in-means_e4103c4c3e3daedd5c1429b7a1bc8727.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/specify-example_3ea3cfa390233b127dc25b05b0354bcf.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/specify-one_149be66261b0606b7ddb80efd10fa81d.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/specify-success_e8eb15e9f621ccf60cb6527a6bccdb4b.rdb
================================================


================================================
FILE: vignettes/infer_cache/html/specify-two_20085531c110a936ee691162f225333b.rdb
================================================


================================================
FILE: vignettes/observed_stat_examples.Rmd
================================================
---
title: "Full infer Pipeline Examples"
description: "A near-exhaustive demonstration of the functionality in infer."
output: 
  rmarkdown::html_vignette:
    df_print: kable
    toc: true
vignette: |
  %\VignetteIndexEntry{Full infer Pipeline Examples}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
---

#### Introduction

```{r}
#| include: false
knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5, 
                      message = FALSE, warning = FALSE) 
options(digits = 4)
```

This vignette is intended to provide a set of examples that nearly exhaustively demonstrate the functionalities provided by infer. Commentary on these examples is limited---for more discussion of the intuition behind the package, see the "Getting to Know infer" vignette, accessible by calling `vignette("infer")`.

Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this:

```{r}
#| label: load-packages
#| echo: false
library(dplyr)
library(infer)
```

```{r}
#| label: load-gss
# load in the dataset
data(gss)

# take a look at its structure
dplyr::glimpse(gss)
```

## Hypothesis tests

### One numerical variable (mean)

Calculating the observed statistic,

```{r}
x_bar <- gss |>
  specify(response = hours) |>
  calculate(stat = "mean")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
x_bar <- gss |>
  observe(response = hours, stat = "mean")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  generate(reps = 1000) |>
  calculate(stat = "mean")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = x_bar, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = x_bar, direction = "two-sided")
```

### One numerical variable (standardized mean $t$)

Calculating the observed statistic,

```{r}
t_bar <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
t_bar <- gss |>
  observe(response = hours, null = "point", mu = 40, stat = "t")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  generate(reps = 1000) |>
  calculate(stat = "t")
```

Alternatively, finding the null distribution using theoretical methods using the `assume()` verb,

```{r}
null_dist_theory <- gss |>
  specify(response = hours)  |>
  assume("t")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = t_bar, direction = "two-sided")
```

Alternatively, visualizing the observed statistic using the theory-based null distribution,

```{r}
visualize(null_dist_theory) +
  shade_p_value(obs_stat = t_bar, direction = "two-sided")
```

Alternatively, visualizing the observed statistic using both of the null distributions,

```{r}
visualize(null_dist, method = "both") +
  shade_p_value(obs_stat = t_bar, direction = "two-sided")
```

Note that the above code makes use of the randomization-based null distribution.

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = t_bar, direction = "two-sided")
```

Alternatively, using the `t_test()` wrapper:

```{r}
gss |>
  t_test(response = hours, mu = 40)
```

`infer` does not support testing on one numerical variable via the `z` distribution.

### One numerical variable (median)

Calculating the observed statistic,

```{r}
x_tilde <- gss |>
  specify(response = age) |>
  calculate(stat = "median")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
x_tilde <- gss |>
  observe(response = age, stat = "median")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(response = age) |>
  hypothesize(null = "point", med = 40) |> 
  generate(reps = 1000) |> 
  calculate(stat = "median")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = x_tilde, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = x_tilde, direction = "two-sided")
```

### One numerical variable (paired)

The example under this header is compatible with `stat`s `"mean"`, `"median"`, `"sum"`, and `"sd"`.

Suppose that each of these survey respondents had provided the number of `hours` worked per week when surveyed 5 years prior, encoded as `hours_previous`. 

```{r}
set.seed(1)

gss_paired <- gss |>
   mutate(
      hours_previous = hours + 5 - rpois(nrow(gss), 4.8),
      diff = hours - hours_previous
   )

gss_paired |>
   select(hours, hours_previous, diff)
```

We'd like to test the null hypothesis that the `"mean"` hours worked per week did not change between the sampled time and five years prior.

infer supports paired hypothesis testing via the `null = "paired independence"` argument to `hypothesize()`.

Calculating the observed statistic,

```{r}
x_tilde <- gss_paired |>
  specify(response = diff) |>
  calculate(stat = "mean")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
x_tilde <- gss_paired |>
  observe(response = diff, stat = "mean")
```

Then, generating the null distribution,

```{r}
null_dist <- gss_paired |>
  specify(response = diff) |>
  hypothesize(null = "paired independence") |> 
  generate(reps = 1000, type = "permute") |> 
  calculate(stat = "mean")
```

Note that the `diff` column itself is not permuted, but rather the signs of the values in the column.

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = x_tilde, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = x_tilde, direction = "two-sided")
```

### One categorical (one proportion)

Calculating the observed statistic,

```{r}
p_hat <- gss |>
  specify(response = sex, success = "female") |>
  calculate(stat = "prop")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
p_hat <- gss |>
  observe(response = sex, success = "female", stat = "prop")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(response = sex, success = "female") |>
  hypothesize(null = "point", p = .5) |>
  generate(reps = 1000) |>
  calculate(stat = "prop")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = p_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = p_hat, direction = "two-sided")
```

Note that logical variables will be coerced to factors:

```{r}
null_dist <- gss |>
  dplyr::mutate(is_female = (sex == "female")) |>
  specify(response = is_female, success = "TRUE") |>
  hypothesize(null = "point", p = .5) |>
  generate(reps = 1000) |>
  calculate(stat = "prop")
```

### One categorical variable (standardized proportion $z$)

Calculating the observed statistic,

```{r}
p_hat <- gss |>
  specify(response = sex, success = "female") |>
  hypothesize(null = "point", p = .5) |>
  calculate(stat = "z")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
p_hat <- gss |>
  observe(response = sex, success = "female", null = "point", p = .5, stat = "z")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(response = sex, success = "female") |>
  hypothesize(null = "point", p = .5) |>
  generate(reps = 1000, type = "draw") |>
  calculate(stat = "z")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = p_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = p_hat, direction = "two-sided")
```

The package also supplies a wrapper around `prop.test()` for tests of a single proportion on tidy data.

```{r}
#| label: prop_test_1_grp
prop_test(gss,
          college ~ NULL,
          p = .2)
```

infer does not support testing two means via the `z` distribution.

### Two categorical (2 level) variables

The `infer` package provides several statistics to work with data of this type. One of them is the statistic for difference in proportions.

Calculating the observed statistic,

```{r}
d_hat <- gss |> 
  specify(college ~ sex, success = "no degree") |>
  calculate(stat = "diff in props", order = c("female", "male"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
d_hat <- gss |>
  observe(
    college ~ sex,
    success = "no degree",
    stat = "diff in props", order = c("female", "male")
  )
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(college ~ sex, success = "no degree") |>
  hypothesize(null = "independence") |>
  generate(reps = 1000) |>
  calculate(stat = "diff in props", order = c("female", "male"))
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = d_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = d_hat, direction = "two-sided")
```

infer also provides functionality to calculate ratios of proportions. The workflow looks similar to that for `diff in props`.

Calculating the observed statistic,

```{r}
r_hat <- gss |> 
  specify(college ~ sex, success = "no degree") |>
  calculate(stat = "ratio of props", order = c("female", "male"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
r_hat <- gss |> 
  observe(college ~ sex, success = "no degree",
          stat = "ratio of props", order = c("female", "male"))
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(college ~ sex, success = "no degree") |>
  hypothesize(null = "independence") |> 
  generate(reps = 1000) |> 
  calculate(stat = "ratio of props", order = c("female", "male"))
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = r_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = r_hat, direction = "two-sided")
```

In addition, the package provides functionality to calculate odds ratios. The workflow also looks similar to that for `diff in props`.

Calculating the observed statistic,

```{r}
or_hat <- gss |> 
  specify(college ~ sex, success = "no degree") |>
  calculate(stat = "odds ratio", order = c("female", "male"))
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(college ~ sex, success = "no degree") |>
  hypothesize(null = "independence") |> 
  generate(reps = 1000) |> 
  calculate(stat = "odds ratio", order = c("female", "male"))
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = or_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = or_hat, direction = "two-sided")
```

### Two categorical (2 level) variables (z)

Finding the standardized observed statistic,

```{r}
z_hat <- gss |> 
  specify(college ~ sex, success = "no degree") |>
  hypothesize(null = "independence") |>
  calculate(stat = "z", order = c("female", "male"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
z_hat <- gss |> 
  observe(college ~ sex, success = "no degree",
          stat = "z", order = c("female", "male"))
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(college ~ sex, success = "no degree") |>
  hypothesize(null = "independence") |> 
  generate(reps = 1000) |> 
  calculate(stat = "z", order = c("female", "male"))
```

Alternatively, finding the null distribution using theoretical methods using the `assume()` verb,

```{r}
null_dist_theory <- gss |>
  specify(college ~ sex, success = "no degree") |>
  assume("z")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = z_hat, direction = "two-sided")
```

Alternatively, visualizing the observed statistic using the theory-based null distribution,

```{r}
visualize(null_dist_theory) +
  shade_p_value(obs_stat = z_hat, direction = "two-sided")
```

Alternatively, visualizing the observed statistic using both of the null distributions,

```{r}
visualize(null_dist, method = "both") +
  shade_p_value(obs_stat = z_hat, direction = "two-sided")
```

Note that the above code makes use of the randomization-based null distribution.

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = z_hat, direction = "two-sided")
```

Note the similarities in this plot and the previous one.

The package also supplies a wrapper around `prop.test` to allow for tests of equality of proportions on tidy data.

```{r}
#| label: prop_test_2_grp
prop_test(gss, 
          college ~ sex,  
          order = c("female", "male"))
```

### One categorical (\>2 level) - GoF

Calculating the observed statistic,

Note the need to add in the hypothesized values here to compute the observed statistic.

```{r}
Chisq_hat <- gss |>
  specify(response = finrela) |>
  hypothesize(
    null = "point",
    p = c(
      "far below average" = 1 / 6,
      "below average" = 1 / 6,
      "average" = 1 / 6,
      "above average" = 1 / 6,
      "far above average" = 1 / 6,
      "DK" = 1 / 6
    )
  ) |>
  calculate(stat = "Chisq")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
Chisq_hat <- gss |>
  observe(
    response = finrela,
    null = "point",
    p = c(
      "far below average" = 1 / 6,
      "below average" = 1 / 6,
      "average" = 1 / 6,
      "above average" = 1 / 6,
      "far above average" = 1 / 6,
      "DK" = 1 / 6
    ),
    stat = "Chisq"
  )
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(response = finrela) |>
  hypothesize(
    null = "point",
    p = c(
      "far below average" = 1 / 6,
      "below average" = 1 / 6,
      "average" = 1 / 6,
      "above average" = 1 / 6,
      "far above average" = 1 / 6,
      "DK" = 1 / 6
    )
  ) |>
  generate(reps = 1000, type = "draw") |>
  calculate(stat = "Chisq")
```

Alternatively, finding the null distribution using theoretical methods using the `assume()` verb,

```{r}
null_dist_theory <- gss |>
  specify(response = finrela) |>
  assume("Chisq")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = Chisq_hat, direction = "greater")
```

Alternatively, visualizing the observed statistic using the theory-based null distribution,

```{r}
visualize(null_dist_theory) +
  shade_p_value(obs_stat = Chisq_hat, direction = "greater")
```

Alternatively, visualizing the observed statistic using both of the null distributions,

```{r}
visualize(null_dist_theory, method = "both") +
  shade_p_value(obs_stat = Chisq_hat, direction = "greater")
```

Note that the above code makes use of the randomization-based null distribution.

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = Chisq_hat, direction = "greater")
```

Alternatively, using the `chisq_test` wrapper:

```{r}
chisq_test(
  gss,
  response = finrela,
  p = c(
    "far below average" = 1 / 6,
    "below average" = 1 / 6,
    "average" = 1 / 6,
    "above average" = 1 / 6,
    "far above average" = 1 / 6,
    "DK" = 1 / 6
  )
)
```

### Two categorical (\>2 level): Chi-squared test of independence

Calculating the observed statistic,

```{r}
Chisq_hat <- gss |>
  specify(formula = finrela ~ sex) |> 
  hypothesize(null = "independence") |>
  calculate(stat = "Chisq")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
Chisq_hat <- gss |>
  observe(formula = finrela ~ sex, stat = "Chisq")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(finrela ~ sex) |>
  hypothesize(null = "independence") |> 
  generate(reps = 1000, type = "permute") |> 
  calculate(stat = "Chisq")
```

Alternatively, finding the null distribution using theoretical methods using the `assume()` verb,

```{r}
null_dist_theory <- gss |>
  specify(finrela ~ sex) |>
  assume(distribution = "Chisq")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = Chisq_hat, direction = "greater")
```

Alternatively, visualizing the observed statistic using the theory-based null distribution,

```{r}
visualize(null_dist_theory) +
  shade_p_value(obs_stat = Chisq_hat, direction = "greater")
```

Alternatively, visualizing the observed statistic using both of the null distributions,

```{r}
visualize(null_dist, method = "both") +
  shade_p_value(obs_stat = Chisq_hat, direction = "greater")
```

Note that the above code makes use of the randomization-based null distribution.

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = Chisq_hat, direction = "greater")
```

Alternatively, using the wrapper to carry out the test,

```{r}
gss |>
  chisq_test(formula = finrela ~ sex)
```

### One numerical variable, one categorical (2 levels) (diff in means)

Calculating the observed statistic,

```{r}
d_hat <- gss |> 
  specify(age ~ college) |> 
  calculate(stat = "diff in means", order = c("degree", "no degree"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
d_hat <- gss |> 
  observe(age ~ college,
          stat = "diff in means", order = c("degree", "no degree"))
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  calculate(stat = "diff in means", order = c("degree", "no degree"))
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = d_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = d_hat, direction = "two-sided")
```

### One numerical variable, one categorical (2 levels) (t)

Finding the standardized observed statistic,

```{r}
t_hat <- gss |> 
  specify(age ~ college) |> 
  hypothesize(null = "independence") |>
  calculate(stat = "t", order = c("degree", "no degree"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
t_hat <- gss |> 
  observe(age ~ college,
          stat = "t", order = c("degree", "no degree"))
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(age ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  calculate(stat = "t", order = c("degree", "no degree"))
```

Alternatively, finding the null distribution using theoretical methods using the `assume()` verb,

```{r}
null_dist_theory <- gss |>
  specify(age ~ college) |>
  assume("t")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = t_hat, direction = "two-sided")
```

Alternatively, visualizing the observed statistic using the theory-based null distribution,

```{r}
visualize(null_dist_theory) +
  shade_p_value(obs_stat = t_hat, direction = "two-sided")
```

Alternatively, visualizing the observed statistic using both of the null distributions,

```{r}
visualize(null_dist, method = "both") +
  shade_p_value(obs_stat = t_hat, direction = "two-sided")
```

Note that the above code makes use of the randomization-based null distribution.

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = t_hat, direction = "two-sided")
```

Note the similarities in this plot and the previous one.

### One numerical variable, one categorical (2 levels) (diff in medians)

Calculating the observed statistic,

```{r}
d_hat <- gss |> 
  specify(age ~ college) |> 
  calculate(stat = "diff in medians", order = c("degree", "no degree"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
d_hat <- gss |> 
  observe(age ~ college,
          stat = "diff in medians", order = c("degree", "no degree"))
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
  specify(age ~ college) |> # alt: response = age, explanatory = season
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  calculate(stat = "diff in medians", order = c("degree", "no degree"))
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = d_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = d_hat, direction = "two-sided")
```

### One numerical, one categorical (\>2 levels) - ANOVA

Calculating the observed statistic,

```{r}
F_hat <- gss |> 
  specify(age ~ partyid) |>
  calculate(stat = "F")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
F_hat <- gss |> 
  observe(age ~ partyid, stat = "F")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
   specify(age ~ partyid) |>
   hypothesize(null = "independence") |>
   generate(reps = 1000, type = "permute") |>
   calculate(stat = "F")
```

Alternatively, finding the null distribution using theoretical methods using the `assume()` verb,

```{r}
null_dist_theory <- gss |>
   specify(age ~ partyid) |>
   hypothesize(null = "independence") |>
   assume(distribution = "F")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = F_hat, direction = "greater")
```

Alternatively, visualizing the observed statistic using the theory-based null distribution,

```{r}
visualize(null_dist_theory) +
  shade_p_value(obs_stat = F_hat, direction = "greater")
```

Alternatively, visualizing the observed statistic using both of the null distributions,

```{r}
visualize(null_dist, method = "both") +
  shade_p_value(obs_stat = F_hat, direction = "greater")
```

Note that the above code makes use of the randomization-based null distribution.

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = F_hat, direction = "greater")
```

### Two numerical vars - SLR

Calculating the observed statistic,

```{r}
slope_hat <- gss |> 
  specify(hours ~ age) |> 
  calculate(stat = "slope")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
slope_hat <- gss |> 
  observe(hours ~ age, stat = "slope")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
   specify(hours ~ age) |> 
   hypothesize(null = "independence") |>
   generate(reps = 1000, type = "permute") |>
   calculate(stat = "slope")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = slope_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = slope_hat, direction = "two-sided")
```

### Two numerical vars - correlation

Calculating the observed statistic,

```{r}
correlation_hat <- gss |> 
  specify(hours ~ age) |> 
  calculate(stat = "correlation")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
correlation_hat <- gss |> 
  observe(hours ~ age, stat = "correlation")
```

Then, generating the null distribution,

```{r}
null_dist <- gss |>
   specify(hours ~ age) |> 
   hypothesize(null = "independence") |>
   generate(reps = 1000, type = "permute") |>
   calculate(stat = "correlation")
```

Visualizing the observed statistic alongside the null distribution,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = correlation_hat, direction = "two-sided")
```

Calculating the p-value from the null distribution and observed statistic,

```{r}
null_dist |>
  get_p_value(obs_stat = correlation_hat, direction = "two-sided")
```

### Two numerical vars - SLR (t)

Not currently implemented since $t$ could refer to standardized slope or standardized correlation.

### Multiple explanatory variables

Calculating the observed fit,

```{r}
obs_fit <- gss |>
  specify(hours ~ age + college) |>
  fit()
```

Generating a distribution of fits with the response variable permuted,

```{r}
null_dist <- gss |>
  specify(hours ~ age + college) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  fit()
```

Generating a distribution of fits where each explanatory variable is permuted independently,

```{r}
null_dist2 <- gss |>
  specify(hours ~ age + college) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute", variables = c(age, college)) |>
  fit()
```

Visualizing the observed fit alongside the null fits,

```{r}
visualize(null_dist) +
  shade_p_value(obs_stat = obs_fit, direction = "two-sided")
```

Calculating p-values from the null distribution and observed fit,

```{r}
null_dist |>
  get_p_value(obs_stat = obs_fit, direction = "two-sided")
```

Note that this `fit()`-based workflow can be applied to use cases with differing numbers of explanatory variables and explanatory variable types.

## Confidence intervals

### One numerical (one mean)

Finding the observed statistic,

```{r}
x_bar <- gss |> 
  specify(response = hours) |>
  calculate(stat = "mean")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
x_bar <- gss |> 
  observe(response = hours, stat = "mean")
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
   specify(response = hours) |>
   generate(reps = 1000, type = "bootstrap") |>
   calculate(stat = "mean")
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- get_ci(boot_dist, type = "se", point_estimate = x_bar)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

Instead of a simulation-based bootstrap distribution, we can also define a theory-based sampling distribution,

```{r}
sampling_dist <- gss |>
   specify(response = hours) |>
   assume(distribution = "t")
```

Visualization and calculation of confidence intervals interfaces in the same way as with the simulation-based distribution,

```{r}
theor_ci <- get_ci(sampling_dist, point_estimate = x_bar)

theor_ci

visualize(sampling_dist) +
  shade_confidence_interval(endpoints = theor_ci)
```

Note that the `t` distribution is recentered and rescaled to lie on the scale of the observed data. infer does not support confidence intervals on means via the `z` distribution.

### One numerical (one mean - standardized)

Finding the observed statistic,

```{r}
t_hat <- gss |> 
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
t_hat <- gss |> 
  observe(response = hours,
          null = "point", mu = 40,
          stat = "t")
```

Then, generating the bootstrap distribution,

```{r}
boot_dist <- gss |>
   specify(response = hours) |>
   generate(reps = 1000, type = "bootstrap") |>
   calculate(stat = "t")
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = t_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

See the above subsection (one mean) for a theory-based approach. Note that infer does not support confidence intervals on means via the `z` distribution.

### One categorical (one proportion)

Finding the observed statistic,

```{r}
p_hat <- gss |> 
   specify(response = sex, success = "female") |>
   calculate(stat = "prop")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
p_hat <- gss |> 
   observe(response = sex, success = "female", stat = "prop")
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
 specify(response = sex, success = "female") |>
 generate(reps = 1000, type = "bootstrap") |>
 calculate(stat = "prop")
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = p_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

Instead of a simulation-based bootstrap distribution, we can also define a theory-based sampling distribution,

```{r}
sampling_dist <- gss |>
   specify(response = sex, success = "female") |>
   assume(distribution = "z")
```

Visualization and calculation of confidence intervals interfaces in the same way as with the simulation-based distribution,

```{r}
theor_ci <- get_ci(sampling_dist, point_estimate = p_hat)

theor_ci

visualize(sampling_dist) +
  shade_confidence_interval(endpoints = theor_ci)
```

Note that the `z` distribution is recentered and rescaled to lie on the scale of the observed data. `infer` does not support confidence intervals on means via the `z` distribution.

### One categorical variable (standardized proportion $z$)

See the above subsection (one proportion) for a theory-based approach.

### One numerical variable, one categorical (2 levels) (diff in means)

Finding the observed statistic,

```{r}
d_hat <- gss |>
  specify(hours ~ college) |>
  calculate(stat = "diff in means", order = c("degree", "no degree"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
d_hat <- gss |>
  observe(hours ~ college,
          stat = "diff in means", order = c("degree", "no degree"))
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
   specify(hours ~ college) |>
   generate(reps = 1000, type = "bootstrap") |>
   calculate(stat = "diff in means", order = c("degree", "no degree"))
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = d_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

Instead of a simulation-based bootstrap distribution, we can also define a theory-based sampling distribution,

```{r}
sampling_dist <- gss |>
   specify(hours ~ college) |>
   assume(distribution = "t")
```

Visualization and calculation of confidence intervals interfaces in the same way as with the simulation-based distribution,

```{r}
theor_ci <- get_ci(sampling_dist, point_estimate = d_hat)

theor_ci

visualize(sampling_dist) +
  shade_confidence_interval(endpoints = theor_ci)
```

Note that the `t` distribution is recentered and rescaled to lie on the scale of the observed data.

`infer` also provides functionality to calculate ratios of means. The workflow looks similar to that for `diff in means`.

Finding the observed statistic,

```{r}
d_hat <- gss |>
  specify(hours ~ college) |>
  calculate(stat = "ratio of means", order = c("degree", "no degree"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
d_hat <- gss |>
  observe(hours ~ college,
          stat = "ratio of means", order = c("degree", "no degree"))
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
   specify(hours ~ college) |>
   generate(reps = 1000, type = "bootstrap") |>
   calculate(stat = "ratio of means", order = c("degree", "no degree"))
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = d_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

### One numerical variable, one categorical (2 levels) (t)

Finding the standardized point estimate,

```{r}
t_hat <- gss |>
  specify(hours ~ college) |>
  calculate(stat = "t", order = c("degree", "no degree"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
t_hat <- gss |>
  observe(hours ~ college,
          stat = "t", order = c("degree", "no degree"))
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
   specify(hours ~ college) |>
   generate(reps = 1000, type = "bootstrap") |>
   calculate(stat = "t", order = c("degree", "no degree"))
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = t_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

See the above subsection (diff in means) for a theory-based approach. `infer` does not support confidence intervals on means via the `z` distribution.

### Two categorical variables (diff in proportions)

Finding the observed statistic,

```{r}
d_hat <- gss |> 
  specify(college ~ sex, success = "degree") |>
  calculate(stat = "diff in props", order = c("female", "male"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
d_hat <- gss |> 
  observe(college ~ sex, success = "degree",
          stat = "diff in props", order = c("female", "male"))
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
  specify(college ~ sex, success = "degree") |>
  generate(reps = 1000, type = "bootstrap") |> 
  calculate(stat = "diff in props", order = c("female", "male"))
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = d_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

Instead of a simulation-based bootstrap distribution, we can also define a theory-based sampling distribution,

```{r}
sampling_dist <- gss |> 
  specify(college ~ sex, success = "degree") |>
   assume(distribution = "z")
```

Visualization and calculation of confidence intervals interfaces in the same way as with the simulation-based distribution,

```{r}
theor_ci <- get_ci(sampling_dist, point_estimate = d_hat)

theor_ci

visualize(sampling_dist) +
  shade_confidence_interval(endpoints = theor_ci)
```

Note that the `z` distribution is recentered and rescaled to lie on the scale of the observed data.

### Two categorical variables (z)

Finding the standardized point estimate,

```{r}
z_hat <- gss |> 
  specify(college ~ sex, success = "degree") |>
  calculate(stat = "z", order = c("female", "male"))
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
z_hat <- gss |> 
  observe(college ~ sex, success = "degree",
          stat = "z", order = c("female", "male"))
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
  specify(college ~ sex, success = "degree") |>
  generate(reps = 1000, type = "bootstrap") |> 
  calculate(stat = "z", order = c("female", "male"))
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = z_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

See the above subsection (diff in props) for a theory-based approach.

### Two numerical vars - SLR

Finding the observed statistic,

```{r}
slope_hat <- gss |> 
  specify(hours ~ age) |>
  calculate(stat = "slope")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
slope_hat <- gss |> 
  observe(hours ~ age, stat = "slope")
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
   specify(hours ~ age) |> 
   generate(reps = 1000, type = "bootstrap") |>
   calculate(stat = "slope")
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = slope_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

### Two numerical vars - correlation

Finding the observed statistic,

```{r}
correlation_hat <- gss |> 
  specify(hours ~ age) |>
  calculate(stat = "correlation")
```

Alternatively, using the `observe()` wrapper to calculate the observed statistic,

```{r}
correlation_hat <- gss |> 
  observe(hours ~ age, stat = "correlation")
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
   specify(hours ~ age) |> 
   generate(reps = 1000, type = "bootstrap") |>
   calculate(stat = "correlation")
```

Use the bootstrap distribution to find a confidence interval,

```{r}
percentile_ci <- get_ci(boot_dist)
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = percentile_ci)
```

Alternatively, use the bootstrap distribution to find a confidence interval using the standard error,

```{r}
standard_error_ci <- boot_dist |>
  get_ci(type = "se", point_estimate = correlation_hat)

visualize(boot_dist) +
  shade_confidence_interval(endpoints = standard_error_ci)
```

### Two numerical vars - t

Not currently implemented since $t$ could refer to standardized slope or standardized correlation.

### Multiple explanatory variables

Calculating the observed fit,

```{r}
obs_fit <- gss |>
  specify(hours ~ age + college) |>
  fit()
```

Then, generating a bootstrap distribution,

```{r}
boot_dist <- gss |>
  specify(hours ~ age + college) |>
  generate(reps = 1000, type = "bootstrap") |>
  fit()
```

Use the bootstrap distribution to find a confidence interval,

```{r}
conf_ints <- 
  get_confidence_interval(
    boot_dist, 
    level = .95, 
    point_estimate = obs_fit
  )
```

Visualizing the observed statistic alongside the distribution,

```{r}
visualize(boot_dist) +
  shade_confidence_interval(endpoints = conf_ints)
```

Note that this `fit()`-based workflow can be applied to use cases with differing numbers of explanatory variables and explanatory variable types.


================================================
FILE: vignettes/paired.Rmd
================================================
---
title: "Tidy inference for paired data"
description: "Conducting tests for paired independence on tidy data with infer."
output: rmarkdown::html_vignette
vignette: |
  %\VignetteIndexEntry{Tidy inference for paired data}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
---

```{r}
#| label: settings
#| include: false
knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) 
options(digits = 4)
```

```{r}
#| label: load-packages
#| echo: false
#| message: false
#| warning: false
library(ggplot2)
library(dplyr)
library(infer)
```

### Introduction

In this vignette, we'll walk through conducting a randomization-based paired test of independence with infer.

Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this:

```{r}
#| label: glimpse-gss-actual
#| warning: false
#| message: false
dplyr::glimpse(gss)
```

Two sets of observations are paired if each observation in one column has a special correspondence or connection with exactly one observation in the other. For the purposes of this vignette, we'll simulate an additional data variable with a natural pairing: suppose that each of these survey respondents had provided the number of `hours` worked per week when surveyed 5 years prior, encoded as `hours_previous`.

```{r}
set.seed(1)

gss_paired <- gss |>
   mutate(
      hours_previous = hours + 5 - rpois(nrow(gss), 4.8),
      diff = hours - hours_previous
   )

gss_paired |>
   select(hours, hours_previous, diff)
```

The number of `hours` worked per week by a particular respondent has a special correspondence with the number of hours worked 5 years prior `hours_previous` by that same respondent. We'd like to test the null hypothesis that the `"mean"` hours worked per week did not change between the sampled time and five years prior.

To carry out inference on paired data with infer, we pre-compute the difference between paired values at the beginning of the analysis, and use those differences as our values of interest.

Here, we pre-compute the difference between paired observations as `diff`. The distribution of `diff` in the observed data looks like this:

```{r}
#| label: plot-diff
#| echo: false
unique_diff <- unique(gss_paired$diff)
gss_paired |>
  ggplot2::ggplot() +
  ggplot2::aes(x = diff) +
  ggplot2::geom_histogram(bins = diff(range(unique_diff))) +
  ggplot2::labs(
    x = "diff: Difference in Number of Hours Worked",
    y = "Number of Responses"
  ) +
  ggplot2::scale_x_continuous(breaks = c(range(unique_diff), 0))
```

From the looks of the distribution, most respondents worked a similar number of hours worked per week as they had 5 hours prior, though it seems like there may be a slight decline of number of hours worked per week in aggregate. (We know that the true effect is -.2 since we've simulated this data.)

We calculate the observed statistic in the paired setting in the same way that we would outside of the paired setting. Using `specify()` and `calculate()`:

```{r}
#| label: calc-obs-mean
# calculate the observed statistic
observed_statistic <- 
   gss_paired |> 
   specify(response = diff) |> 
   calculate(stat = "mean")
```

The observed statistic is `r observed_statistic`. Now, we want to compare this statistic to a null distribution, generated under the assumption that the true difference was actually zero, to get a sense of how likely it would be for us to see this observed difference if there were truly no change in hours worked per week in the population.

Tests for paired data are carried out via the `null = "paired independence"` argument to `hypothesize()`.

```{r}
#| label: generate-null
# generate the null distribution
null_dist <- 
   gss_paired |> 
   specify(response = diff) |> 
   hypothesize(null = "paired independence") |>
   generate(reps = 1000, type = "permute") |>
   calculate(stat = "mean")
   
null_dist
```

For each replicate, `generate()` carries out `type = "permute"` with `null = "paired independence"` by:

* Randomly sampling a vector of signs (i.e. -1 or 1), probability .5 for either, with length equal to the input data, and
* Multiplying the response variable by the vector of signs, "flipping" the observed values for a random subset of value in each replicate

To get a sense for what this distribution looks like, and where our observed statistic falls, we can use `visualize()`:

```{r}
#| label: visualize
# visualize the null distribution and test statistic
null_dist |>
  visualize() + 
  shade_p_value(observed_statistic,
                direction = "two-sided")
```

It looks like our observed mean of `r observed_statistic` would be relatively unlikely if there were truly no change in mean number of hours worked per week over this time period.

More exactly, we can calculate the p-value:

```{r}
#| label: p-value
# calculate the p value from the test statistic and null distribution
p_value <- null_dist |>
  get_p_value(obs_stat = observed_statistic,
              direction = "two-sided")

p_value
```

Thus, if the change in mean number of hours worked per week over this time period were truly zero, our approximation of the probability that we would see a test statistic as or more extreme than `r observed_statistic` is approximately `r p_value`.

We can also generate a bootstrap confidence interval for the mean paired difference using `type = "bootstrap"` in `generate()`. As before, we use the pre-computed differences when generating bootstrap resamples:

```{r}
#| label: generate-boot
# generate a bootstrap distribution
boot_dist <- 
   gss_paired |> 
   specify(response = diff) |> 
   hypothesize(null = "paired independence") |>
   generate(reps = 1000, type = "bootstrap") |>
   calculate(stat = "mean")
   
visualize(boot_dist)
```

Note that, unlike the null distribution of test statistics generated earlier with `type = "permute"`, this distribution is centered at `observed_statistic`. 

Calculating a confidence interval:

```{r}
#| label: confidence-interval
# calculate the confidence from the bootstrap distribution
confidence_interval <- boot_dist |>
  get_confidence_interval(level = .95)

confidence_interval
```

By default, `get_confidence_interval()` constructs the lower and upper bounds by taking the observations at the $(1 - .95) / 2$ and $1 - ((1-.95) / 2)$th percentiles. To instead build the confidence interval using the standard error of the bootstrap distribution, we can write:

```{r}
boot_dist |>
  get_confidence_interval(type = "se",
                          point_estimate = observed_statistic,
                          level = .95)
```

To learn more about randomization-based inference for paired observations, see the relevant chapter in [Introduction to Modern Statistics](https://openintro-ims.netlify.app/inference-paired-means.html).


================================================
FILE: vignettes/t_test.Rmd
================================================
---
title: "Tidy t-Tests with infer"
description: "Conducting t-Tests on tidy data with infer."
output: rmarkdown::html_vignette
vignette: |
  %\VignetteIndexEntry{Tidy t-Tests with infer}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
---

```{r}
#| label: settings
#| include: false
knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) 
options(digits = 4)
```

```{r}
#| label: load-packages
#| echo: false
#| message: false
#| warning: false
library(ggplot2)
library(dplyr)
library(infer)
```

### Introduction

In this vignette, we'll walk through conducting $t$-tests and their randomization-based analogue using infer. We'll start out with a 1-sample $t$-test, which compares a sample mean to a hypothesized true mean value. Then, we'll discuss 2-sample $t$-tests, testing the difference in means of two populations using a sample of data drawn from them. If you're interested in evaluating whether differences in paired values (e.g. some measure taken of a person before and after an experiment) differ from 0, see `vignette("paired", package = "infer")`.

Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this:

```{r}
#| label: glimpse-gss-actual
#| warning: false
#| message: false
dplyr::glimpse(gss)
```

### 1-Sample t-Test

The 1-sample $t$-test can be used to test whether a sample of continuous data could have plausibly come from a population with a specified mean. 

As an example, we'll test whether the average American adult works 40 hours a week using data from the `gss`. To do so, we make use of the `hours` variable, giving the number of hours that respondents reported having worked in the previous week. The distribution of `hours` in the observed data looks like this:

```{r}
#| label: plot-1-sample
#| echo: false
gss |>
  ggplot2::ggplot() +
  ggplot2::aes(x = hours) +
  ggplot2::geom_histogram(bins = 20) +
  ggplot2::labs(
    x = "hours: Number of Hours Worked",
    y = "Number of Responses"
  ) +
  ggplot2::scale_x_continuous(breaks = seq(0, 90, 10))
```

It looks like most respondents reported having worked 40 hours, but there's quite a bit of variability. Let's test whether we have evidence that the true mean number of hours that Americans work per week is 40.

infer's randomization-based analogue to the 1-sample $t$-test is a 1-sample mean test. We'll start off showcasing that test before demonstrating how to carry out a theory-based $t$-test with the package.

First, to calculate the observed statistic, we can use `specify()` and `calculate()`.

```{r}
#| label: calc-obs-stat-1-sample
#| warning: false
#| message: false
# calculate the observed statistic
observed_statistic <- gss |>
  specify(response = hours) |>
  calculate(stat = "mean")
```

The observed statistic is `r observed_statistic`. Now, we want to compare this statistic to a null distribution, generated under the assumption that the mean was actually 40, to get a sense of how likely it would be for us to see this observed mean if the true number of hours worked per week in the population was really 40.

We can `generate()` the null distribution using the bootstrap. In the bootstrap, for each replicate, a sample of size equal to the input sample size is drawn (with replacement) from the input sample data. This allows us to get a sense of how much variability we'd expect to see in the entire population so that we can then understand how unlikely our sample mean would be.

```{r}
#| label: generate-null-1-sample
#| warning: false
#| message: false
# generate the null distribution
null_dist_1_sample <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  generate(reps = 1000, type = "bootstrap") |>
  calculate(stat = "mean")
```

To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:

```{r}
#| label: visualize-1-sample
#| warning: false
#| message: false
# visualize the null distribution and test statistic!
null_dist_1_sample |>
  visualize() +
  shade_p_value(observed_statistic,
    direction = "two-sided"
  )
```

It looks like our observed mean of `r observed_statistic` would be relatively unlikely if the true mean was actually 40 hours a week. More exactly, we can calculate the p-value:

```{r}
#| label: p-value-1-sample
#| warning: false
#| message: false
# calculate the p value from the test statistic and null distribution
p_value_1_sample <- null_dist_1_sample |>
  get_p_value(obs_stat = observed_statistic,
              direction = "two-sided")

p_value_1_sample
```

Thus, if the true mean number of hours worked per week was really 40, our approximation of the probability that we would see a test statistic as or more extreme than `r observed_statistic` is approximately `r p_value_1_sample`.

Analogously to the steps shown above, the package supplies a wrapper function, `t_test`, to carry out 1-sample $t$-tests on tidy data. Rather than using randomization, the wrappers carry out the theory-based $t$-test. The syntax looks like this:

```{r}
#| label: t-test-wrapper
#| message: false
#| warning: false
t_test(gss, response = hours, mu = 40)
```

An alternative approach to the `t_test()` wrapper is to calculate the observed statistic with an infer pipeline and then supply it to the `pt` function from base R. 

```{r}
# calculate the observed statistic
observed_statistic <- gss |>
  specify(response = hours) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t") |>
  dplyr::pull()
```

Note that this pipeline to calculate an observed statistic includes a call to `hypothesize()` since the $t$ statistic requires a hypothesized mean value.

Then, juxtaposing that $t$ statistic with its associated distribution using the `pt` function:

```{r}
pt(unname(observed_statistic), df = nrow(gss) - 1, lower.tail = FALSE)*2
```

Note that the resulting $t$-statistics from these two theory-based approaches are the same.

### 2-Sample t-Test

2-Sample $t$-tests evaluate the difference in mean values of two populations using data randomly-sampled from the population that approximately follows a normal distribution. As an example, we'll test if Americans work the same number of hours a week regardless of whether they have a college degree or not using data from the `gss`. The `college` and `hours` variables allow us to do so:

```{r}
#| label: plot-2-sample
#| echo: false
gss |>
  ggplot2::ggplot() +
  ggplot2::aes(x = college, y = hours) +
  ggplot2::geom_boxplot() +
  ggplot2::labs(x = "college: Whether the Respondent has a College Degree",
                y = "hours: Number of Hours Worked")
```

It looks like both of these distributions are centered near 40 hours a week, but the distribution for those with a degree is slightly right skewed.

infer's randomization-based analogue to the 2-sample $t$-test is a difference in means test. We'll start off showcasing that test before demonstrating how to carry out a theory-based $t$-test with the package.

As with the one-sample test, to calculate the observed difference in means, we can use `specify()` and `calculate()`.

```{r}
#| label: calc-obs-stat-2-sample
#| warning: false
#| message: false
# calculate the observed statistic
observed_statistic <- gss |>
  specify(hours ~ college) |>
  calculate(stat = "diff in means", order = c("degree", "no degree"))

observed_statistic
```

Note that, in the line `specify(hours ~ college)`, we could have swapped this out with the syntax `specify(response = hours, explanatory = college)`!

The `order` argument in that `calculate` line gives the order to subtract the mean values in: in our case, we're taking the mean number of hours worked by those with a degree minus the mean number of hours worked by those without a degree; a positive difference, then, would mean that people with degrees worked more than those without a degree.

Now, we want to compare this difference in means to a null distribution, generated under the assumption that the number of hours worked a week has no relationship with whether or not one has a college degree, to get a sense of how likely it would be for us to see this observed difference in means if there were really no relationship between these two variables.

We can `generate()` the null distribution using permutation, where, for each replicate, each value of degree status will be randomly reassigned (without replacement) to a new number of hours worked per week in the sample in order to break any association between the two.

```{r}
#| label: generate-null-2-sample
#| warning: false
#| message: false
# generate the null distribution with randomization
null_dist_2_sample <- gss |>
  specify(hours ~ college) |>
  hypothesize(null = "independence") |>
  generate(reps = 1000, type = "permute") |>
  calculate(stat = "diff in means", order = c("degree", "no degree"))
```

Again, note that, in the lines `specify(hours ~ college)` in the above chunk, we could have used the syntax `specify(response = hours, explanatory = college)` instead!

To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`.

```{r}
#| label: visualize-2-sample
#| warning: false
#| message: false
# visualize the randomization-based null distribution and test statistic!
null_dist_2_sample |>
  visualize() + 
  shade_p_value(observed_statistic,
                direction = "two-sided")
```

It looks like our observed statistic of `r observed_statistic` would be unlikely if there was truly no relationship between degree status and number of hours worked. More exactly, we'll use the randomization-based null distribution to calculate the p-value.

```{r}
#| label: p-value-2-sample
#| warning: false
#| message: false
# calculate the p value from the randomization-based null 
# distribution and the observed statistic
p_value_2_sample <- null_dist_2_sample |>
  get_p_value(obs_stat = observed_statistic,
              direction = "two-sided")

p_value_2_sample
```

Thus, if there were really no relationship between the number of hours worked a week and whether one has a college degree, the probability that we would see a statistic as or more extreme than `r observed_statistic` is approximately `r p_value_2_sample`.

Note that, similarly to the steps shown above, the package supplies a wrapper function, `t_test()`, to carry out 2-sample $t$-tests on tidy data. The syntax looks like this:

```{r}
#| label: 2-sample-t-test-wrapper
#| message: false
#| warning: false
t_test(x = gss, 
       formula = hours ~ college, 
       order = c("degree", "no degree"),
       alternative = "two-sided")
```

In the above example, we specified the relationship with the syntax `formula = hours ~ college`; we could have also written `response = hours, explanatory = college`.

An alternative approach to the `t_test()` wrapper is to calculate the observed statistic with an infer pipeline and then supply it to the `pt` function from base R. We can calculate the statistic as before, switching out the `stat = "diff in means"` argument with `stat = "t"`.

```{r}
# calculate the observed statistic
observed_statistic <- gss |>
  specify(hours ~ college) |>
  hypothesize(null = "point", mu = 40) |>
  calculate(stat = "t", order = c("degree", "no degree")) |>
  dplyr::pull()

observed_statistic
```

Note that this pipeline to calculate an observed statistic includes `hypothesize()` since the $t$ statistic requires a hypothesized mean value.

Then, juxtaposing that $t$ statistic with its associated distribution using the `pt()` function:

```{r}
pt(unname(observed_statistic), df = nrow(gss) - 2, lower.tail = FALSE)*2
```

Note that the results from these two theory-based approaches are nearly the same.