Showing preview only (1,257K chars total). Download the full file or copy to clipboard to get everything.
Repository: SteffenMoritz/imputeTS
Branch: master
Commit: bca3fcd312d5
Files: 180
Total size: 1.2 MB
Directory structure:
gitextract_ov74y_ad/
├── .Rbuildignore
├── .github/
│ ├── .gitignore
│ └── workflows/
│ ├── R-CMD-check.yaml
│ ├── pkgdown.yaml
│ ├── pr-commands.yaml
│ └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE.txt
├── NAMESPACE
├── NEWS.md
├── R/
│ ├── .Rapp.history
│ ├── RcppExports.R
│ ├── deprecated_defunct.R
│ ├── ggplot_na_distribution.R
│ ├── ggplot_na_distribution2.R
│ ├── ggplot_na_gapsize.R
│ ├── ggplot_na_gapsize2.R
│ ├── ggplot_na_imputations.R
│ ├── imputeTS-package.R
│ ├── internal_algorithm_interface.R
│ ├── na_interpolation.R
│ ├── na_kalman.R
│ ├── na_locf.R
│ ├── na_ma.R
│ ├── na_mean.R
│ ├── na_random.R
│ ├── na_remove.R
│ ├── na_replace.R
│ ├── na_seadec.R
│ ├── na_seasplit.R
│ ├── statsNA.R
│ ├── tsAirgap.R
│ ├── tsAirgapComplete.R
│ ├── tsHeating.R
│ ├── tsHeatingComplete.R
│ ├── tsNH4.R
│ └── tsNH4Complete.R
├── README.md
├── _pkgdown.yaml
├── codecov.yml
├── data/
│ ├── tsAirgap.rda
│ ├── tsAirgapComplete.rda
│ ├── tsHeating.rda
│ ├── tsHeatingComplete.rda
│ ├── tsNH4.rda
│ └── tsNH4Complete.rda
├── docs/
│ ├── 404.html
│ ├── articles/
│ │ ├── gallery_visualizations.html
│ │ ├── gallery_visualizations_files/
│ │ │ ├── accessible-code-block-0.0.1/
│ │ │ │ └── empty-anchor.js
│ │ │ ├── header-attrs-2.16/
│ │ │ │ └── header-attrs.js
│ │ │ └── header-attrs-2.7/
│ │ │ └── header-attrs.js
│ │ └── index.html
│ ├── authors.html
│ ├── bootstrap-toc.css
│ ├── bootstrap-toc.js
│ ├── docsearch.css
│ ├── docsearch.js
│ ├── index.html
│ ├── news/
│ │ └── index.html
│ ├── pkgdown.css
│ ├── pkgdown.js
│ ├── pkgdown.yml
│ ├── reference/
│ │ ├── figures/
│ │ │ └── Cheat_Sheet_imputeTS.pptx
│ │ ├── ggplot_na_distribution.html
│ │ ├── ggplot_na_distribution2.html
│ │ ├── ggplot_na_gapsize.html
│ │ ├── ggplot_na_gapsize2.html
│ │ ├── ggplot_na_imputations.html
│ │ ├── ggplot_na_intervals.html
│ │ ├── ggplot_na_level.html
│ │ ├── ggplot_na_level2.html
│ │ ├── ggplot_na_pattern.html
│ │ ├── imputeTS-package.html
│ │ ├── imputeTS.html
│ │ ├── index.html
│ │ ├── na.interpolation.html
│ │ ├── na.kalman.html
│ │ ├── na.locf.html
│ │ ├── na.ma.html
│ │ ├── na.mean.html
│ │ ├── na.random.html
│ │ ├── na.remove.html
│ │ ├── na.replace.html
│ │ ├── na.seadec.html
│ │ ├── na.seasplit.html
│ │ ├── na_interpolation.html
│ │ ├── na_kalman.html
│ │ ├── na_locf.html
│ │ ├── na_ma.html
│ │ ├── na_mean.html
│ │ ├── na_random.html
│ │ ├── na_remove.html
│ │ ├── na_replace.html
│ │ ├── na_seadec.html
│ │ ├── na_seasplit.html
│ │ ├── plotNA.distribution.html
│ │ ├── plotNA.distributionBar.html
│ │ ├── plotNA.gapsize.html
│ │ ├── plotNA.imputations.html
│ │ ├── reexports.html
│ │ ├── statsNA.html
│ │ ├── tsAirgap.html
│ │ ├── tsAirgapComplete.html
│ │ ├── tsHeating.html
│ │ ├── tsHeatingComplete.html
│ │ ├── tsNH4.html
│ │ └── tsNH4Complete.html
│ └── sitemap.xml
├── imputeTS.Rproj
├── inst/
│ └── CITATION
├── man/
│ ├── ggplot_na_distribution.Rd
│ ├── ggplot_na_distribution2.Rd
│ ├── ggplot_na_gapsize.Rd
│ ├── ggplot_na_gapsize2.Rd
│ ├── ggplot_na_imputations.Rd
│ ├── ggplot_na_intervals.Rd
│ ├── imputeTS-package.Rd
│ ├── na.interpolation.Rd
│ ├── na.kalman.Rd
│ ├── na.locf.Rd
│ ├── na.ma.Rd
│ ├── na.mean.Rd
│ ├── na.random.Rd
│ ├── na.remove.Rd
│ ├── na.replace.Rd
│ ├── na.seadec.Rd
│ ├── na.seasplit.Rd
│ ├── na_interpolation.Rd
│ ├── na_kalman.Rd
│ ├── na_locf.Rd
│ ├── na_ma.Rd
│ ├── na_mean.Rd
│ ├── na_random.Rd
│ ├── na_remove.Rd
│ ├── na_replace.Rd
│ ├── na_seadec.Rd
│ ├── na_seasplit.Rd
│ ├── plotNA.distribution.Rd
│ ├── plotNA.distributionBar.Rd
│ ├── plotNA.gapsize.Rd
│ ├── plotNA.imputations.Rd
│ ├── reexports.Rd
│ ├── statsNA.Rd
│ ├── tsAirgap.Rd
│ ├── tsAirgapComplete.Rd
│ ├── tsHeating.Rd
│ ├── tsHeatingComplete.Rd
│ ├── tsNH4.Rd
│ └── tsNH4Complete.Rd
├── src/
│ ├── RcppExports.cpp
│ ├── locf.cpp
│ └── ma.cpp
├── tests/
│ ├── testthat/
│ │ ├── test-apply_base_algorithm.R
│ │ ├── test-depreciated_defunct.R
│ │ ├── test-error_handling.R
│ │ ├── test-ggplot_na_distribution.R
│ │ ├── test-ggplot_na_distribution2.R
│ │ ├── test-ggplot_na_gapsize.R
│ │ ├── test-ggplot_na_gapsize2.R
│ │ ├── test-ggplot_na_imputations.R
│ │ ├── test-input-na_advanced-tsObjects.R
│ │ ├── test-na_interpolation.R
│ │ ├── test-na_kalman.R
│ │ ├── test-na_locf.R
│ │ ├── test-na_ma.R
│ │ ├── test-na_mean.R
│ │ ├── test-na_random.R
│ │ ├── test-na_remove.R
│ │ ├── test-na_replace.R
│ │ ├── test-na_seadec.R
│ │ ├── test-na_seasplit.R
│ │ ├── test-parameter-maxgap.R
│ │ └── test-statsNA.R
│ └── testthat.R
└── vignettes/
├── Cheat_Sheet_imputeTS.pdf.asis
├── Cheat_Sheet_imputeTS.pptx
├── RJournal.sty
├── gallery_visualizations.Rmd
└── imputeTS-Time-Series-Missing-Value-Imputation-in-R.ltx
================================================
FILE CONTENTS
================================================
================================================
FILE: .Rbuildignore
================================================
imputeTS-header.png
imputeTS-header.jpg
imputeTS-logo1800x2100.png
^.*\.Rproj$
^\.Rproj\.user$
^.*\.yml$
^.*\.yaml$
^.*\.ini$
^.*\.txt$
^.*\.pptx$
^appveyor\.yml$
Icon
Icon?
Docs
^doc$
^Meta$
^revdep
^\.github$
^codecov\.yml$
================================================
FILE: .github/.gitignore
================================================
*.html
================================================
FILE: .github/workflows/R-CMD-check.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
#
# NOTE: This workflow is overkill for most R packages and
# check-standard.yaml is likely a better choice.
# usethis::use_github_action("check-standard") will install it.
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
name: R-CMD-check
jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}
name: ${{ matrix.config.os }} (${{ matrix.config.r }})
strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
# exercise older Windows toolchain via oldrel-4 (R 4.1.x, rtools40)
- {os: windows-latest, r: 'oldrel-4'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}
- {os: ubuntu-latest, r: 'oldrel-2'}
- {os: ubuntu-latest, r: 'oldrel-3'}
- {os: ubuntu-latest, r: 'oldrel-4'}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2
- uses: r-lib/actions/setup-tinytex@v2
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: rcmdcheck
- name: Install dependencies
run: |
tinytex::parse_install(text = "! LaTeX Error: File `etex.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `tikz.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `fancyhdr.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `microtype.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `setspace.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `titlesec.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `placeins.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `caption.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `environ.sty' not found.")
tinytex::parse_install(text = "! LaTeX Error: File `upquote.sty' not found.")
tinytex::parse_install(text = "! Font OML/zplm/m/it/9=zplmr7m at 9.0pt not loadable: Metric (TFM) file not found.")
tinytex::parse_install(text = "! Font T1/ppl/m/n/10=pplr8t at 10.0pt not loadable: Metric (TFM) file not found.")
shell: Rscript {0}
- uses: r-lib/actions/check-r-package@v2
- name: Show testthat output
if: always()
run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
shell: bash
- name: Upload check results
if: failure()
uses: actions/upload-artifact@main
with:
name: ${{ runner.os }}-r${{ matrix.config.r }}-results
path: check
================================================
FILE: .github/workflows/pkgdown.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
release:
types: [published]
workflow_dispatch:
name: pkgdown
jobs:
pkgdown:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: pkgdown
needs: website
- name: Install the package
run: R CMD INSTALL .
- name: Deploy package
run: |
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
================================================
FILE: .github/workflows/pr-commands.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
issue_comment:
types: [created]
name: Commands
jobs:
document:
if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
name: document
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/pr-fetch@v2
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: roxygen2
- name: Document
run: Rscript -e 'roxygen2::roxygenise()'
- name: commit
run: |
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
git add man/\* NAMESPACE
git commit -m 'Document'
- uses: r-lib/actions/pr-push@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
style:
if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
name: style
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/pr-fetch@v2
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: r-lib/actions/setup-r@v2
- name: Install dependencies
run: Rscript -e 'install.packages("styler")'
- name: Style
run: Rscript -e 'styler::style_pkg()'
- name: commit
run: |
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
git add \*.R
git commit -m 'Style'
- uses: r-lib/actions/pr-push@v2
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
================================================
FILE: .github/workflows/test-coverage.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
name: test-coverage
jobs:
test-coverage:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: covr
- name: Test coverage
run: covr::codecov()
shell: Rscript {0}
================================================
FILE: .gitignore
================================================
.Rproj.user
/.Rhistory
/.dropbox
/desktop.ini
/.RData
Icon?
Icon
*.o
*.dll
*.so
doc
Meta
/doc/
/Meta/
================================================
FILE: DESCRIPTION
================================================
Package: imputeTS
Version: 3.4
Date: 2025-08-25
Title: Time Series Missing Value Imputation
Description: Imputation (replacement) of missing values
in univariate time series.
Offers several imputation functions
and missing data plots.
Available imputation algorithms include:
'Mean', 'LOCF', 'Interpolation',
'Moving Average', 'Seasonal Decomposition',
'Kalman Smoothing on Structural Time Series models',
'Kalman Smoothing on ARIMA models'. Published in Moritz and Bartz-Beielstein (2017)
<doi:10.32614/RJ-2017-009>.
Author: Steffen Moritz [aut, cre, cph] (<https://orcid.org/0000-0002-0085-1804>), Sebastian Gatscha [aut], Earo Wang [ctb] (<https://orcid.org/0000-0001-6448-5260>), Ron Hause [ctb] (<https://orcid.org/0000-0002-5229-7366>)
Authors@R:
c(
person("Steffen", "Moritz", email="steffen.moritz10@gmail.com", role=c("aut", "cre", "cph"), comment = c(ORCID = "0000-0002-0085-1804")),
person("Sebastian", "Gatscha", email="sebastian_gatscha@gmx.at", role="aut"),
person("Earo", "Wang", email = "earo.wang@gmail.com", role = c("ctb"), comment = c(ORCID = "0000-0001-6448-5260")),
person("Ron", "Hause", email = "ronaldhause@gmail.com", role = c("ctb"), comment = c(ORCID = "0000-0002-5229-7366"))
)
Maintainer: Steffen Moritz <steffen.moritz10@gmail.com>
LazyData: yes
Type: Package
ByteCompile: TRUE
BugReports: https://github.com/SteffenMoritz/imputeTS/issues
URL: https://github.com/SteffenMoritz/imputeTS, https://steffenmoritz.github.io/imputeTS/
Repository: CRAN
Depends:
R (>= 3.6)
Imports:
stats,
grDevices,
ggplot2 (>= 3.3.0),
ggtext,
stinepack,
forecast,
magrittr,
methods,
Rcpp
Suggests:
testthat, R.rsp, knitr, zoo, timeSeries, tis, xts, tibble, tsibble, rmarkdown, covr
License: GPL-3
VignetteBuilder: R.rsp, knitr, rmarkdown
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
LinkingTo: Rcpp
Encoding: UTF-8
================================================
FILE: LICENSE.txt
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
{one line to give the program's name and a brief idea of what it does.}
Copyright (C) {year} {name of author}
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
{project} Copyright (C) {year} {fullname}
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand
export("%>%")
export(ggplot_na_distribution)
export(ggplot_na_distribution2)
export(ggplot_na_gapsize)
export(ggplot_na_gapsize2)
export(ggplot_na_imputations)
export(ggplot_na_intervals)
export(na.interpolation)
export(na.kalman)
export(na.locf)
export(na.ma)
export(na.mean)
export(na.random)
export(na.remove)
export(na.replace)
export(na.seadec)
export(na.seasplit)
export(na_interpolation)
export(na_kalman)
export(na_locf)
export(na_ma)
export(na_mean)
export(na_random)
export(na_remove)
export(na_replace)
export(na_seadec)
export(na_seasplit)
export(plotNA.distribution)
export(plotNA.distributionBar)
export(plotNA.gapsize)
export(plotNA.imputations)
export(statsNA)
import(stats)
importFrom(Rcpp,sourceCpp)
importFrom(forecast,auto.arima)
importFrom(forecast,findfrequency)
importFrom(ggplot2,aes)
importFrom(ggplot2,after_stat)
importFrom(ggplot2,alpha)
importFrom(ggplot2,coord_flip)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_text)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_text)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,guide_legend)
importFrom(ggplot2,guides)
importFrom(ggplot2,labs)
importFrom(ggplot2,position_dodge)
importFrom(ggplot2,scale_color_manual)
importFrom(ggplot2,scale_fill_gradientn)
importFrom(ggplot2,scale_fill_manual)
importFrom(ggplot2,scale_size_identity)
importFrom(ggplot2,scale_x_continuous)
importFrom(ggplot2,scale_x_discrete)
importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,stat_bin)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_classic)
importFrom(ggplot2,theme_linedraw)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(ggtext,element_markdown)
importFrom(grDevices,heat.colors)
importFrom(grDevices,nclass.Sturges)
importFrom(magrittr,"%>%")
importFrom(methods,hasArg)
importFrom(stats,KalmanRun)
importFrom(stats,KalmanSmooth)
importFrom(stats,StructTS)
importFrom(stats,approx)
importFrom(stats,arima)
importFrom(stats,frequency)
importFrom(stats,median)
importFrom(stats,runif)
importFrom(stats,spline)
importFrom(stats,stl)
importFrom(stats,ts)
importFrom(stinepack,stinterp)
importFrom(utils,globalVariables)
useDynLib(imputeTS)
================================================
FILE: NEWS.md
================================================
# Changes in Version 3.4
* Added ggplot_na_gapsize2 plot (and unit tests).
Nice way to illustrate how different NA gapsizes (consecutive NAs in a row) amount for NA totals.
* Fix of the CITATION file to comply with newer CRAN rules
* Update of ggplot_na_imputations() to avoid using now depreciated ggplot2 options
* Update of unit tests for all plotting functions (ggplot_na_...). Now using is_ggplot() to check for correct output.
This was necessary because of a major ggplot2 update (switch to s7 classes).
# Changes in Version 3.3
Thanks to Sabrina Krys, Kevin Villalobos, Tracy Shen, hezhichao1991, englianhu for bug / issue reporting.
Thanks to RicardaP for fixing documentation error.
Thanks to Ronald Hause for the commit to optimize parameter pass trough from approx to na_interpolation.
* Renamed ggplot_na_intervals to ggplot_na_distribution2
* Updates to ggplot_na_gapsize: Space between the bars adjusted for better optics.
Added parameters for directly choosing the bar border color and alpha value for
filling of the bars.
* Improved notification message for na_seadec/na_seasplit when find_frequency couldn't find
a seasonal pattern.
* Corrected error in na_kalman documentation - auto.arima was wrongly described as default parameter choice, while in reality it is StructTS
(reported by RicardaP)
* Changes for the error handling. (**These changes got reverted and did not make it into the CRAN release**). For some specific cases the input checks performed by imputeTS stop pipe workflows in their entirety. E.g. a problem when group_by leads to all NA subsets - which fail the input check and then stop the whole pipe workflow. To prevent this, stop() is only called, when the user supplied imputeTS algorithm parameter options are wrong or misspelled. Unsupported input data will only give a warning() (and do not perform any action on the data). Thus, there is no call to stop(), that cancels the whole pipe workflow. (issue reported by Sabrina Krys). This works fine, but after closer consideration we figured people fail to notice warnings way too often and thus it is more user friendly to clearly stop with an error for these issues. After all, the users data analysis clearly profits from taking a closer look in these specific cases. If you are anyhow interested in the version without the reverted changes, it can be installed from github with the following command: devtools::install_github("https://github.com/SteffenMoritz/imputeTS/commit/aaf759216b4091e36dee6e8e3a10185ff8f4647b")
* Improved error messages (especially for multivariate inputs) and unit tests for the warnings and errors.
* Corrected typo in 'Input data needs at least x non-NA data points' error message
* Better parameter pass trough from approx to na_interpolation- Added capability to alter rule for linear extrapolation outside the interval [min(x), max(x)]
(commit by Ronald Hause)
* Improved na_interpolation documentation (more information about possible parameter pass
through from underlying spline, approx,stinterp functions)
* Additional unit tests
* Moved to Github Actions instead of TravisCI / AppVeyor.
* Bugfix for "Error in optim(init[mask], getLike, method = "L-BFGS-B", lower = rep(0, : L-BFGS-B needs finite values of 'fn'.", which comes for completely constant input to na_kalman e.g. 4,4,4,NA,4,4. (reported by Kevin Villalobos, Tracy Shen, hezhichao1991, englianhu)
* Improved na_seadec documentation (algorithm details)
* Changed R Version requirement in Description to R (>= 3.6) since imported packages like ggtext and also some testthat tests were already requiring newer versions than the old R (≥ 3.0.1) requirement of imputeTS
# Changes in Version 3.2
Thanks to Mark J. Lamias for bug / issue reporting.
Thanks to Cyrus Mohammadian for bug reporting.
Thanks to Miroslaw Janik for issue reporting.
* Fix to remove CRAN note - removed not used utils from DESCRIPTION imports
* Minor fix to ggplot_na_distribution (bars end now at max(timeseries)*1.05)
* Typo corrections in statsNA
* Specified ggplot2 (>= 3.3.0) in imports, to prevent errors with older ggplot2 versions (reported by Cyrus Mohammadian)
* Updated na_locf documentation to make behavior of na_remaining parameter more clear (issue reported by Mark J. Lamias)
* ggplot_na_intervals, has now percentages with % sign (e.g. 10%) on y-scale instead of just numbers (e.g. 0,1)
(suggestion from Miroslaw Janik)
* Added some figures and the Cheat Sheet .pptx to .Rbuildignore to avoid CRAN warning about
package size. These files and figures were not needed for the CRAN version.
# Changes in Version 3.1
Thanks to Johannes Menzel for bug reporting, Thanks to Jan (jmablans) for bug reporting.
Thanks to Earo Wang for speedup of plotNA.gapsize.
Special Thanks to Sebastian Gatscha for plotting functions, new na_mean options, new unit tests.
* Plotting functions are all in ggplot now (way better looking). Additionally they got renamed accordingly ggplot_na_distribution, ggplot_na_intervals, ggplot_na_gapsize, ggplot_na_imputations.
* Speedup for plotNA_gapsize calculation (now renamed ggplot_na_gapsize) (thx to Earo Wang)
* Added harmonic and geometric mean as option for na_mean
* Removed bug in na_replace - it can now be used with all NA vectors since
it requires no minimum of non-NA values (reported by Jan - jmablans)
* Improved na.random input check (usable with all NA input now if upper and lower bound
parameters are explicitly set to numeric values)
* Additional unit tests for the plotting functions
* Additional unit tests for the all imputation functions (testing all NA input)
* Update for testthat unit tests
* Fixed a mistake in README.md (reported by Johannes Menzel)
* Added to statsNA: Number of Gaps, Average Gap Size + reformatting of code +
compatibility with other ts objects
* Documentation improvements through newer roxygen version (Markup now possible in documentation)
* updated Readme + Vignette to new function names
* Added the imputeTS Cheat Sheet as Vignette
* Added new vignette Gallery Missing Data Visualizations
* Added revdep
# Changes in Version 3.0
Thanks to Jim Maas, shreydesai, Breza, CameronNemo for reporting bugs.
Thanks to Sebastian Gatscha providing the (way faster) C++ na.ma() implementation.
* tibble and tstibble compatibility
* Reworked internal code documentation
* na.ma speed up via C++
* Changed vignette builder to R.rsp
* Used R package styler package to optimize source code readability
* Made some changes to better follow tidyverse style guide
* Replaced na. with na_ e.g. na.mean with na_mean usw.This fits better to modern code
style guidelines. The old function names will still work for a while,
but give a warning.
* Added findFrequency option to na.seadec and na.seasplit
* Added maxgap option
* Fixed bug for na.seadec - also imputed known values in some special cases
(reported by CameronNemo)
* Added doi: 10.32614/RJ-2017-009 to describtion, references, readme and citation file
* Added StackExchange link to Readme
* Moved stinepack from imports to suggested
* Internal reorganization of imports - now always using pkg::function and importFrom pkg x1 x2 x3instead of just import pkg
* Fixed bug in na.ma when using xts time series with NA at the end
* Fixed error message in na.interpolation if wrong parameter is given
stop("Wrong parameter 'option' given. Value must be either 'linear', 'spline' or 'stine'.")
(reported by Breza)
* Fixed spelling mistakes in na.seadec and na.seasplit (reported by shreydesai)
* Fixed bug with na.random() output (reported by Jim Maas)
# Changes in Version 2.7
* Updated Description: Orcid Id added, packages required for unit test add as "Suggested"
* Small correction in README.md, small update to citation file
* Replaced NEWS with NEWS.md for better formatting
# Changes in Version 2.6
* Updated citation file
* Minor changes to vignette
# Changes in Version 2.5
* Adjusted unit test to a update of forecast package
# Changes in Version 2.4
* Small speed improvments for na.kalman
* Improved input check for all functions
* Bugfix for unit tests
* Changes to unit test (because of zoo update)
# Changes in Version 2.3
* Bugfix for na.kalman with integer input
* Readme Update
* Improved error messages for na.seasplit and na.seadec
* Minor vignette changes
# Changes in Version 2.2
* Bugfix for na.locf (also concerned na.kalman)
# Changes in Version 2.1
* Fixed for problems with Solaris/Sparc
* Fixes for problems with vignette on osx
# Changes in Version 2.0
* Bugfix for plots without missing data
* Increased performance for na.locf
* Minor bugfixes for specific data.frame inputs
* Minor bugfixes for specific xts object inputs
* Improved Code Documentation
* Added new software tests
# Changes in Version 1.9
* Added Vignette
# Changes in Version 1.8
* Computation time improvments for na.locf (up to 10000 times faster)
* Computation time improvments for na.interpolation (up to 10000 times faster)
* Computation time improvments for na.kalman (only slightly faster, under 10%)
* Fixed unnecessary warning message with some na.kalman options
* Adjusted default parameters for plotNA.distributionBar (using nclass.Sturges for breaks parameter)
* Fixed issue with too sensitive input checking
# Changes in Version 1.7
* Enabled usage of multivariate input (data.frame, mts, matrix,...) for all imputation
functions except na.remove. This means users do not have to loop through all columns
by themselfes anymore if they want to use the package with multivariate data. The
imputation itself is still performend in univariate manner (column after column).
* Improved compatibility with different advanced time series objects like zoo and xts.
Using the imputation functions with these time series objects should be possible now.
These series will not be explicitly named as possible input in the user documentation.
Absence of errors can not be guaranteed. However, there are no known issues yet.
* Added several things for unit tests with pkg 'testthat'
* Added unit tests for every function
* Adjusted error messages
* Internal Coding style improvement: replaced all T with TRUE and all F with FALSE
* Adjustment tsHeating / tsHeatingComplete datasets (set 1440 as frequency parameter)
* Adjustment tsNH4 / tsNH4Complete datasets (set 144 as frequency parameter)
* Fixes for grammar, spelling and citations in the whole documentation
* Revised examples in the documentation for all functions
* Restricted output of na.remove to vector only (issue with incorrect time information otherwise)
* Added better x-axes labels for plotNA.distribution
# Changes in Version 1.6
* Added github links to description file
* Added citation file
* Updated Readme (badges for travis ci and cran status)
* Fix in documentation for na.interpolation
(due to outdated descriptions)
* Fix in documentation plotNA.distribution / plotNA.distributionBar
(due to interchanged descriptions)
* Added references to used packages in na.kalman and na.interpolation documentation
# Changes in Version 1.5
* Allows now also numeric vectors as input
* Removed na.identifier parameter for all functions (too error prone, better handled
individually by the user)
* Minor changes in na.interpolation with option = "stine"
* Added na.ma imputation function
* Replaced "data" in all function parameters with the more common "x"
* Improvement of all code examples
* Renamed heating/heatingComplete dataset to tsHeating/tsHeatingComplete
* Renamed nh4/nh4Complete dataset to tsNH4/tsNH4Complete
* Added tsAirgap / tsAirgapComplete datasets
* Improved imputeTS-package documentation
* Added na.kalman imputation function
* Added README.md function
* Added statsNA function
* Added plotNA.gapsize function
* Renamed vis.imputations to plotNA.imputations
* Renamed vis.barMissing to plotNA.distributionBar
* Renamed vis.missing to plotNA.distribution
* Fixed issues with parameter pass through and legend for
all plotting functions
* Improved dataset documentation
# Changes in Version 0.4
* Update of vis.differences (better looking plot now)
* Added vis.missing to visualize the distribution of missing data in a time series
* Added vis.barMissing, which is especially suited to visualize missing data in very huge time series
* Update na.interpolate (added Stineman interpolation and enabled ... parameter
for all interpolation algorithms to pass through parameters to the underlying functions)
# Changes in Version 0.3
* Added two datasets of sensor data
* vis.differences for plotting differences between real and imputed values
# Changes in Version 0.2
* Removed internal functions from visible package documentation
* Added additional algorithms: na.seasplit and na.seadec
* internal function for algorithm selection
# Changes in Version 0.1
* Created initial version of imputeTS package for univariate time series imputation
* added the simple imputation functions: na.locf, na.mean, na.random, na.interpolation,
na.replace
* added na.remove function for removing all NAs from a time series
================================================
FILE: R/.Rapp.history
================================================
================================================
FILE: R/RcppExports.R
================================================
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
locf <- function(x, reverse) {
.Call('_imputeTS_locf', PACKAGE = 'imputeTS', x, reverse)
}
ma <- function(x, k, weighting) {
.Call('_imputeTS_ma', PACKAGE = 'imputeTS', x, k, weighting)
}
================================================
FILE: R/deprecated_defunct.R
================================================
#--------------------------------------------------------------#
# Collection of DEPRECATED AND DEFUNCT FUNCTIONS
#--------------------------------------------------------------#
#--------------------------------------------------------------#
# IMPUTATION FUNCTIONS
# Old na. imputation functions, replaced by na_
# Deprecated since Version 3.0 (2019-07-01)
#--------------------------------------------------------------#
# na.interpolation()
# replaced by na_interpolation
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_interpolation}} instead.
#' @description na.interpolation is replaced by \code{\link[imputeTS]{na_interpolation}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_interpolation
#' @keywords internal
#' @export
na.interpolation <- function(x, option = "linear", maxgap = Inf, ...) {
.Deprecated(
new = "na_interpolation",
old = "na.interpolation",
msg = "na.interpolation will be replaced by na_interpolation.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_interpolation(x, option, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
# na.kalman()
# replaced by na_kalman
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_kalman}} instead.
#' @description na.kalman is replaced by \code{\link[imputeTS]{na_kalman}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_kalman
#' @keywords internal
#' @export
na.kalman <- function(x, model = "StructTS", smooth = TRUE, nit = -1, maxgap = Inf, ...) {
.Deprecated(
new = "na_kalman",
old = "na.kalman",
msg = "na.kalman will be replaced by na_kalman.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_kalman(x, model, smooth, nit, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
# na.locf()
# replaced by na_locf
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_locf}} instead.
#' @description na.locf is replaced by \code{\link[imputeTS]{na_locf}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_locf
#' @keywords internal
#' @export
na.locf <- function(x, option = "locf", na.remaining = "rev", maxgap = Inf, ...) {
.Deprecated(
new = "na_locf",
old = "na.locf",
msg = "na.locf will be replaced by na_locf.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_locf(x, option, na.remaining, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
# na.ma()
# replaced by na_ma
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_ma}} instead.
#' @description na.ma is replaced by \code{\link[imputeTS]{na_ma}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_ma
#' @keywords internal
#' @export
na.ma <- function(x, k = 4, weighting = "exponential", maxgap = Inf, ...) {
.Deprecated(
new = "na_ma",
old = "na.ma",
msg = "na.ma will be replaced by na_ma.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_ma(x, k, weighting, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
# na.mean()
# replaced by na_mean
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_mean}} instead.
#' @description na.mean is replaced by \code{\link[imputeTS]{na_mean}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_mean
#' @keywords internal
#' @export
na.mean <- function(x, option = "mean", maxgap = Inf, ...) {
.Deprecated(
new = "na_mean",
old = "na.mean",
msg = "na.mean will be replaced by na_mean.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_mean(x, option, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
# na.random()
# replaced by na_random
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_random}} instead.
#' @description na.random is replaced by \code{\link[imputeTS]{na_random}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_random
#' @keywords internal
#' @export
na.random <- function(x, lower_bound = NULL, upper_bound = NULL, maxgap = Inf, ...) {
.Deprecated(
new = "na_random",
old = "na.random",
msg = "na.random will be replaced by na_random.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_random(x, lower_bound, upper_bound, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
# na.remove()
# replaced by na_remove
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_remove}} instead.
#' @description na.remove is replaced by \code{\link[imputeTS]{na_remove}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_remove
#' @keywords internal
#' @export
na.remove <- function(x, ...) {
.Deprecated(
new = "na_remove",
old = "na.remove",
msg = "na.remove will be replaced by na_remove.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_remove(x, ...)
}
#--------------------------------------------------------------------------------------#
# na.replace()
# replaced by na_replace
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_replace}} instead.
#' @description na.replace is replaced by \code{\link[imputeTS]{na_replace}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_replace
#' @keywords internal
#' @export
na.replace <- function(x, fill = 0, maxgap = Inf, ...) {
.Deprecated(
new = "na_replace",
old = "na.replace",
msg = "na.replace will be replaced by na_replace.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_replace(x, fill, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
# na.seadec()
# replaced by na_seadec
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_seadec}} instead.
#' @description na.seadec is replaced by \code{\link[imputeTS]{na_seadec}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_seadec
#' @keywords internal
#' @export
na.seadec <- function(x, algorithm = "interpolation", find_frequency = FALSE, maxgap = Inf, ...) {
.Deprecated(
new = "na_seadec",
old = "na.seadec",
msg = "na.seadec will be replaced by na_seadec.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_seadec(x, algorithm, find_frequency, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
# na.seasplit()
# replaced by na_seasplit
#--------------------------------------------------------------------------------------#
#' Deprecated use \code{\link[imputeTS]{na_seasplit}} instead.
#' @description na.seasplit is replaced by \code{\link[imputeTS]{na_seasplit}}.
#' The functionality stays the same. The new name better fits modern R code
#' style guidelines (which prefer _ over . in function names).
#' @inheritParams na_seasplit
#' @keywords internal
#' @export
na.seasplit <- function(x, algorithm = "interpolation", find_frequency = FALSE, maxgap = Inf, ...) {
.Deprecated(
new = "na_seasplit",
old = "na.seasplit",
msg = "na.seasplit will be replaced by na_seasplit.
Functionality stays the same.
The new function name better fits modern R code style guidelines.
Please adjust your code accordingly."
)
na_seasplit(x, algorithm, find_frequency, maxgap, ...)
}
#--------------------------------------------------------------------------------------#
#--------------------------------------------------------------#
# PLOTTING FUNCTIONS
# Old plotNA. visualization functions, replaced by ggplot_na_
# Deprecated since Version 3.1 (2020-07-30)
#--------------------------------------------------------------#
# plotNA.distribution()
# replaced by ggplot_na_distribution
#--------------------------------------------------------------------------------------#
#' @title Discontinued - Use \code{\link[imputeTS]{ggplot_na_distribution}} instead.
#' @description plotNA.distribution was replaced by \code{\link[imputeTS]{ggplot_na_distribution}}.
#' The new plotting function provides an improved version of the old plot, e.g. it looks better now and is better adjustable,
#' because it is based on ggplot2. If you absolutely want to use the old function,
#' you need to download an older package version. Versions 3.0 and below still have the old functions.
#' @keywords internal
#' @export
plotNA.distribution <- function(x, ... ) {
.Defunct(
new = "ggplot_na_distribution",
msg = "
plotNA.distribution was replaced by ggplot_na_distribution.
Use this function instead.
The plot itself is the same, but looks better now and is better adjustable, because it is based on ggplot2.
If you absolutely want to use the old function, you need to manually download an older package version.
(Versions 3.0 and below still have the old functions)"
)
}
#--------------------------------------------------------------------------------------#
# ggplot_na_intervals
# replaced by ggplot_na_intervals
#--------------------------------------------------------------------------------------#
#' @title Discontinued - Use \code{\link[imputeTS]{ggplot_na_distribution2}} instead.
#' @description plotNA.distributionBar was replaced by \code{\link[imputeTS]{ggplot_na_distribution2}}.
#' The new plotting function provides an improved version of the old plot e.g. it looks better now and is better adjustable,
#' because it is based on ggplot2. If you absolutely want to use the old function,
#' you need to download an older package version. Versions 3.0 and below still have the old functions.
#' @keywords internal
#' @export
ggplot_na_intervals <- function(x, ... ) {
.Defunct(
new = "ggplot_na_distribution2",
msg = "
ggplot_na_intervals was renamed to ggplot_na_distribution2.
Use this function instead.
Functionality stays the same. "
)
}
#--------------------------------------------------------------------------------------#
# plotNA.distributionBar()
# replaced by ggplot_na_intervals
#--------------------------------------------------------------------------------------#
#' @title Discontinued - Use \code{\link[imputeTS]{ggplot_na_distribution2}} instead.
#' @description plotNA.distributionBar was replaced by \code{\link[imputeTS]{ggplot_na_distribution2}}.
#' The new plotting function provides an improved version of the old plot e.g. it looks better now and is better adjustable,
#' because it is based on ggplot2. If you absolutely want to use the old function,
#' you need to download an older package version. Versions 3.0 and below still have the old functions.
#' @keywords internal
#' @export
plotNA.distributionBar <- function(x, ... ) {
.Defunct(
new = "ggplot_na_distribution2",
msg = "
plotNA.distributionBar was replaced by ggplot_na_distribution2.
Use this function instead.
The plot itself is the same, but looks better now and is better adjustable, because it is based on ggplot2.
If you absolutely want to use the old function, you need to manually download an older package version.
(Versions 3.0 and below still have the old functions)"
)
}
#--------------------------------------------------------------------------------------#
# plotNA.gapsize()
# replaced by ggplot_na_gapsize
#--------------------------------------------------------------------------------------#
#' @title Discontinued - Use \code{\link[imputeTS]{ggplot_na_gapsize}} instead.
#' @description plotNA.gapsize was replaced by \code{\link[imputeTS]{ggplot_na_gapsize}}.
#' The new plotting function provides an improved version of the old plot e.g. it looks better now and is better adjustable,
#' because it is based on ggplot2. If you absolutely want to use the old function,
#' you need to download an older package version. Versions 3.0 and below still have the old functions.
#' @keywords internal
#' @export
plotNA.gapsize <- function(x, ... ) {
.Defunct(
new = "ggplot_na_gapsize",
msg = "
plotNA.gapsize was replaced by ggplot_na_gapsize.
Use this function instead.
The plot itself is the same, but looks better now and is better adjustable, because it is based on ggplot2.
If you absolutely want to use the old function, you need to manually download an older package version.
(Versions 3.0 and below still have the old functions)"
)
}
#--------------------------------------------------------------------------------------#
# plotNA.imputations()
# replaced by ggplot_na_imputations
#--------------------------------------------------------------------------------------#
#' @title Discontinued - Use \code{\link[imputeTS]{ggplot_na_imputations}} instead.
#' @description plotNA.imputations was replaced by \code{\link[imputeTS]{ggplot_na_imputations}}.
#' The new plotting function provides an improved version of the old plot e.g. it looks better now and is better adjustable,
#' because it is based on ggplot2. If you absolutely want to use the old function,
#' you need to download an older package version. Versions 3.0 and below still have the old functions.
#' @keywords internal
#' @export
plotNA.imputations <- function(x, ... ) {
.Defunct(
new = "ggplot_na_imputations",
msg = "
plotNA.imputations was replaced by ggplot_na_imputations.
Use this function instead.
The plot itself is the same, but looks better now and is better adjustable, because it is based on ggplot2.
If you absolutely want to use the old function, you need to manually download an older package version.
(Versions 3.0 and below still have the old functions)"
)
}
#--------------------------------------------------------------------------------------#
================================================
FILE: R/ggplot_na_distribution.R
================================================
#' @title Line Plot to Visualize the Distribution of Missing Values
#'
#' @description Visualize the distribution of missing values within a time series.
#'
#' @param x Numeric Vector (\code{\link[base]{vector}}) or Time Series
#' (\code{\link[stats]{ts}}) object containing NAs. This is the only mandatory
#' parameter - all other parameters are only needed for adjusting the plot appearance.
#'
#' @param x_axis_labels For adding specific x-axis labels. Takes a vector of
#' \code{\link[base]{Date}} or \code{\link[base]{POSIXct}} objects
#' as an input (needs the same length as x) . The Default (NULL) uses the
#' observation numbers as x-axis tick labels.
#'
#' @param color_points Color for the Symbols/Points.
#'
#' @param color_lines Color for the Lines.
#'
#' @param color_missing Color used for highlighting the time spans with NA values.
#'
#' @param color_missing_border Color used as border for time spans with NA values.
#'
#' @param alpha_missing Alpha (transparency) value used for color_missing.
#'
#' @param title Title of the Plot (NULL for deactivating title).
#'
#' @param subtitle Subtitle of the Plot (NULL for deactivating subtitle).
#'
#' @param xlab Label for x-Axis.
#'
#' @param ylab Label for y-Axis.
#'
#' @param shape_points Symbol to use for the Observations/Points. See
#' https://ggplot2.tidyverse.org/articles/ggplot2-specs.html as reference.
#'
#' @param size_points Size of Symbols/Points.
#'
#' @param theme Set a Theme for ggplot2. Default is ggplot2::theme_linedraw().
#' (\code{\link[ggplot2]{theme_linedraw})}
#'
#' @details This function visualizes the distribution of missing values within
#' a time series. If a value is NA, the background is colored differently.
#' This gives a good overview of where most missing values occur.
#'
#' The only really needed parameter for this function is x (the univariate
#' time series that shall be visualized). All other parameters are solely
#' for altering the appearance of the plot.
#'
#' As long as the input is univariate and numeric the function also takes
#' data.frame, tibble, tsibble, zoo, xts as an input.
#'
#' The plot can be adjusted to your needs via the function parameters.
#' Additionally, for more complex adjustments, the output can also be
#' adjusted via ggplot2 syntax. This is possible, since the output
#' of the function is a ggplot2 object. Also take a look at the Examples
#' to see how adjustments are made.
#'
#' For very long time series it might happen, that the plot gets too crowded
#' and overplotting issues occur. In this case the
#' \code{\link[imputeTS]{ggplot_na_distribution2}} plotting function can provide
#' a more condensed overview.
#'
#'
#' @author Steffen Moritz, Sebastian Gatscha
#'
#' @seealso \code{\link[imputeTS]{ggplot_na_distribution2}},
#' \code{\link[imputeTS]{ggplot_na_gapsize}},
#' \code{\link[imputeTS]{ggplot_na_gapsize2}},
#' \code{\link[imputeTS]{ggplot_na_imputations}}
#'
#' @examples
#' # Example 1: Visualize the missing values in x
#' x <- stats::ts(c(1:11, 4:9, NA, NA, NA, 11:15, 7:15, 15:6, NA, NA, 2:5, 3:7))
#' ggplot_na_distribution(x)
#'
#' # Example 2: Visualize the missing values in tsAirgap time series
#' ggplot_na_distribution(tsAirgap)
#'
#' # Example 3: Same as example 1, just written with pipe operator
#' x <- ts(c(1:11, 4:9, NA, NA, NA, 11:15, 7:15, 15:6, NA, NA, 2:5, 3:7))
#' x %>% ggplot_na_distribution()
#'
#' # Example 4: Visualize NAs in tsAirgap - different color for points
#' # Plot adjustments via ggplot_na_distribution function parameters
#' ggplot_na_distribution(tsAirgap, color_points = "grey")
#'
#' # Example 5: Visualize NAs in tsAirgap - different theme
#' # Plot adjustments via ggplot_na_distribution function parameters
#' ggplot_na_distribution(tsAirgap, theme = ggplot2::theme_classic())
#'
#' # Example 6: Visualize NAs in tsAirgap - title, subtitle in center
#' # Plot adjustments via ggplot2 syntax
#' ggplot_na_distribution(tsAirgap) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) +
#' ggplot2::theme(plot.subtitle = ggplot2::element_text(hjust = 0.5))
#'
#' # Example 7: Visualize NAs in tsAirgap - title in center, no subtitle
#' # Plot adjustments via ggplot2 syntax and function parameters
#' ggplot_na_distribution(tsAirgap, subtitle = NULL) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
#'
#' # Example 8: Visualize NAs in tsAirgap - x-axis texts with angle
#' # Plot adjustments via ggplot2 syntax and function parameters
#' ggplot_na_distribution(tsAirgap, color_points = "grey") +
#' ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1))
#'
#' @importFrom ggplot2 theme_linedraw ggplot geom_point aes geom_line geom_bar ggtitle
#' xlab ylab theme element_text theme_classic
#'
#' @importFrom stats ts
#'
#' @importFrom magrittr %>%
#'
#' @export
ggplot_na_distribution <- function(x,
x_axis_labels = NULL,
color_points = "steelblue",
color_lines = "steelblue2",
color_missing = "indianred",
color_missing_border = "indianred",
alpha_missing = 0.5,
title = "Distribution of Missing Values",
subtitle = "Time Series with highlighted missing regions",
xlab = "Time",
ylab = "Value",
shape_points = 20,
size_points = 2.5,
theme = ggplot2::theme_linedraw()) {
data <- x
##
## 1. Input Check and Transformation
##
# 1.1 special handling data types
if (any(class(data) == "tbl_ts")) {
data <- as.vector(as.data.frame(data)[, 2])
}
else if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.2 Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
stop("x is not univariate. The function only works with univariate
input for x. For data types with multiple variables/columns only input
the column you want to plot as parameter x.")
}
# 1.3 Checks and corrections for wrong data dimension
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.4 Input as vector
data <- as.vector(data)
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric")
}
# 1.6 Check preconditions about amount of NAs
# exclude NA only inputs
missindx <- is.na(data)
if (all(missindx)) {
stop("Input data consists only of NAs. At least one non-NA numeric value is needed
for creating a meaningful ggplot_na_distribution plot)")
}
##
## End Input Check and Transformation
##
##
## 2. Preparations
##
# 2.1 Create required data
# Get NA positions
id_na <- which(is.na(data))
# 2.2 Create dataframe for ggplot2
# Define x-axis label data
# if Date or POSIXct given for x_axis_labels time information can be plotted
if (any(class(x_axis_labels) == "Date")) {
time <- x_axis_labels
width_na_bar <- as.numeric(time[2] - time[1]) * 0.9
}
else if (any(class(x_axis_labels) == "POSIXct")) {
time <- x_axis_labels
width_na_bar <- as.numeric(difftime(time[2], time[1], units = "secs")) * 0.9
}
else if (is.null(x_axis_labels)) {
time <- seq_along(data)
width_na_bar <- as.numeric(time[2] - time[1]) * 0.9
}
else {
stop("Input for x_axis_labels is not in a supported format, must a
vector of Date or a POSIXct objects with the same length as x")
}
# Create the remainder of the data.frame for ggplot2
value <- data
df <- data.frame(time, value)
##
## End Preparations
##
##
## 3. Create the ggplot2 plot
##
# Create the plot
gg <- ggplot2::ggplot() +
# Adding the Line + Parameters
ggplot2::geom_line(
data = df, na.rm = T,
ggplot2::aes(x = time, y = value), col = color_lines
) +
# Adding the Points + Parameters
ggplot2::geom_point(
data = df, na.rm = TRUE,
ggplot2::aes(x = time, y = value), shape = shape_points,
col = color_points, size = size_points
) +
# Adding additional modifications like title, subtitle, theme,...
ggplot2::ggtitle(label = title, subtitle = subtitle) +
ggplot2::xlab(xlab) +
ggplot2::ylab(ylab) +
theme
# Add the red background bars for missing data areas
if (length(id_na) > 0) {
# Red Bars only if missing data in time series
na_val <- max(df$value*1.05, na.rm = TRUE)
gg <- gg +
ggplot2::geom_bar(
data = df[is.na(df$value), ], stat = "identity",
ggplot2::aes(x = time, y = na_val),
col = color_missing_border, fill = color_missing,
alpha = alpha_missing, width = width_na_bar
)
}
##
## End creating the ggplot2 plot
##
return(gg)
}
================================================
FILE: R/ggplot_na_distribution2.R
================================================
#' @title Stacked Bar Plot to Visualize Missing Values per Time Interval
#'
#' @description Visualization of missing values in barplot form.
#' Especially useful when looking at specific intervals and for
#' time series with a lot of observations.
#'
#' @param x Numeric Vector (\code{\link[base]{vector}}) or Time Series
#' (\code{\link[stats]{ts}}) object containing NAs. This is the only mandatory
#' parameter - all other parameters are only needed for adjusting the plot appearance.
#'
#' @param number_intervals Defines the number of bins to be created. Default
#' number of intervals (denoted by NULL) is calculated by \code{\link[grDevices]{nclass.Sturges}}
#' using Sturges' formula. If the interval_size parameter is set to a value
#' different to NULL this parameter is ignored.
#'
#' @param interval_size Defines how many observations should be in one bin/interval.
#' The required number of overall bins is afterwards calculated automatically.
#' If used this parameter overwrites the number_intervals parameter.
#' For a very long time series be sure to make the interval_size not extremely
#' small, otherwise because of overplotting issues nothing can be seen until
#' you also increase the plot width.
#'
#' @param measure Whether the NA / non-NA ratio should be given as
#' percent or absolute numbers.
#'
#' \itemize{
#' \item{"percent" - for percentages}
#'
#' \item{"count" - for absolute numbers of NAs}
#' }
#'
#' @param color_missing Color for the amount of missing values.
#'
#' @param color_existing Color for the amount of existing values.
#'
#' @param alpha_missing Alpha (transparency) value for the missing values.
#'
#' @param alpha_existing Alpha (transparency) value for the existing values.
#'
#' @param title Title of the Plot (NULL for deactivating title).
#'
#' @param subtitle Subtitle of the Plot (NULL for deactivating subtitle).
#'
#' @param xlab Label for x-Axis. Automatically set to the current interval size, if
#' no custom text is chosen.
#'
#' @param ylab Label for y-Axis. As default (NULL), the axis is automatically set
#' to either 'Percent' or 'Count' dependent on the settings of parameter \code{measure}.
#'
#' @param color_border Color for the small borders between the intervals/bins.
#' Default is 'white'.
#'
#' @param theme Set a Theme for ggplot2. Default is ggplot2::theme_linedraw().
#' (\code{\link[ggplot2]{theme_linedraw})}
#'
#' @details This function visualizes the distribution of missing values within
#' a time series. In comparison to the \code{\link[imputeTS]{ggplot_na_distribution}}
#' function this is not done by plotting each observation of the time series
#' separately. Instead observations for time intervals are represented as
#' intervals/bins of multiple values. For these intervals information about
#' the amount of missing values are shown. This has the advantage, that also
#' for large time series a plot which is easy to overview can be created.
#'
#' The only really needed parameter for this function is x (the univariate
#' time series that shall be visualized). All other parameters are solely
#' for altering the appearance of the plot.
#'
#' As long as the input is univariate and numeric the function also takes
#' data.frame, tibble, tsibble, zoo, xts as an input.
#'
#' The plot can be adjusted to your needs via the function parameters.
#' Additionally, for more complex adjustments, the output can also be
#' adjusted via ggplot2 syntax. This is possible, since the output
#' of the function is a ggplot2 object. Also take a look at the Examples
#' to see how adjustments are made.
#'
#' @author Steffen Moritz, Sebastian Gatscha
#'
#' @seealso \code{\link[imputeTS]{ggplot_na_distribution}},
#' \code{\link[imputeTS]{ggplot_na_gapsize}},
#' \code{\link[imputeTS]{ggplot_na_gapsize2}},
#' \code{\link[imputeTS]{ggplot_na_imputations}}
#'
#' @examples
#' # Example 1: Visualize the missing values in tsNH4 time series as percentages
#' ggplot_na_distribution2(tsNH4)
#'
#' # Example 2: Visualize the missing values in tsNH4 time series as counts
#' ggplot_na_distribution2(tsNH4, measure = "count")
#'
#' # Example 3: Visualize the missing values in tsHeating time series
#' ggplot_na_distribution2(tsHeating)
#'
#' # Example 4: Same as example 1, just written with pipe operator
#' tsNH4 %>% ggplot_na_distribution2()
#'
#' # Example 5: Visualize NAs in tsNH4 - exactly 8 intervals
#' ggplot_na_distribution2(tsNH4, number_intervals = 8)
#'
#' # Example 6: Visualize NAs in tsNH4 - 300 observations per interval
#' ggplot_na_distribution2(tsNH4, interval_size = 300)
#'
#' # Example 7: Visualize NAs in tsAirgap - different color for NAs
#' # Plot adjustments via ggplot_na_distribution2 function parameters
#' ggplot_na_distribution2(tsAirgap, color_missing = "pink")
#'
#' # Example 8: Visualize NAs in tsNH4 - different theme
#' # Plot adjustments via ggplot_na_distribution2 function parameters
#' ggplot_na_distribution2(tsNH4, theme = ggplot2::theme_classic())
#'
#' # Example 9: Visualize NAs in tsAirgap - title, subtitle in center
#' # Plot adjustments via ggplot2 syntax
#' ggplot_na_distribution2(tsAirgap) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) +
#' ggplot2::theme(plot.subtitle = ggtext::element_markdown(hjust = 0.5))
#'
#' # Example 10: Visualize NAs in tsAirgap - title in center, no subtitle
#' # Plot adjustments via ggplot2 syntax and function parameters
#' ggplot_na_distribution2(tsAirgap, subtitle = NULL) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
#'
#' # Example 11: Visualize NAs in tsAirgap - x-axis texts with angle
#' # Plot adjustments via ggplot2 syntax and function parameters
#' ggplot_na_distribution2(tsAirgap, color_missing = "grey") +
#' ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 60, hjust = 1))
#'
#' @importFrom magrittr %>%
#'
#' @importFrom grDevices nclass.Sturges
#'
#' @importFrom ggplot2 theme_linedraw alpha ggplot aes scale_fill_manual
#' theme element_blank scale_x_continuous scale_y_continuous
#' labs xlab ylab stat_bin after_stat theme_classic
#'
#' @importFrom ggtext element_markdown
#'
#' @export
ggplot_na_distribution2 <- function(x,
number_intervals = NULL,
interval_size = NULL,
measure = "percent",
color_missing = "indianred2",
color_existing = "steelblue",
alpha_missing = 0.8,
alpha_existing = 0.3,
title = "Missing Values per Interval",
subtitle = "Amount of NA and non-NA for successive intervals",
xlab = "Time Lapse (Interval Size: XX)",
ylab = NULL,
color_border = "white",
theme = ggplot2::theme_linedraw()) {
data <- x
##
## 1. Input Check and Transformation
##
# 1.1 special handling data types
if (any(class(data) == "tbl_ts")) {
data <- as.vector(as.data.frame(data)[, 2])
}
else if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.2 Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
stop("x is not univariate. The function only works with univariate
input for x. For data types with multiple variables/columns only input
the column you want to plot as parameter x.")
}
# 1.3 Checks and corrections for wrong data dimension
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.4 Input as vector
data <- as.vector(data)
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric")
}
# 1.6 Check preconditions about amount of NAs
# exclude NA only inputs
missindx <- is.na(data)
if (all(missindx)) {
stop("Input data consists only of NAs. At least one non-NA numeric value is needed
for creating a meaningful ggplot_na_distribution plot)")
}
##
## End Input Check and Transformation
##
##
## 2. Preparations
##
# 2.1 Calculation default number of intervals
if (is.null(number_intervals)) {
number_intervals <- grDevices::nclass.Sturges(data)
}
# 2.2 Calculation break points
if (!is.null(interval_size)) {
breaks <- seq(from = 0, to = length(data) - 1, by = interval_size)
breaks <- c(breaks, length(data))
}
else {
breaks <- seq(from = 0, to = length(data) - 1, by = floor(length(data) / number_intervals))
breaks <- c(breaks, length(data))
}
binwidth <- breaks[2]
# 2.3 Process parameter settings
# Add alpha values to colors
color_missing <- ggplot2::alpha(color_missing, alpha_missing)
color_existing <- ggplot2::alpha(color_existing, alpha_existing)
# Set subtitle to default
# (needed because .Rd usage section gives error when using defaults > 90 chars )
if ( (!is.null(subtitle)) && (subtitle == "Amount of NA and non-NA for successive intervals")) {
subtitle <- paste0("Amount of <b style='color:", color_missing, ";' >NA</b>
and <b style='color:", color_existing, "' >non-NA</b>
for successive intervals")
}
# Set ylab according to choosen measure
if (is.null(ylab)) {
ifelse(measure == "percent", ylab <- "Percent", ylab <- "Count")
}
# Set xlab according to choosen parameters
if (xlab == "Time Lapse (Interval Size: XX)") {
xlab <- paste("Time Lapse (Interval Size:", binwidth, ")")
}
# 2.4 Create dataframe for ggplot2
index <- seq_along(data)
miss <- as.factor(is.na(data))
df <- data.frame(index, miss)
##
## End Preparations
##
##
## 3. Create the ggplot2 plot
##
# Create the ggplot2 plot
gg <- ggplot2::ggplot(df, ggplot2::aes(index, fill = miss)) +
ggplot2::scale_fill_manual(
values = c(color_existing, color_missing),
labels = c("NAs", "non-NAs")
) +
theme +
ggplot2::theme(
legend.position = "none",
legend.title = ggplot2::element_blank(),
plot.subtitle = ggtext::element_markdown(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor.x = ggplot2::element_blank(),
) +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::labs(title = title, subtitle = subtitle) +
ggplot2::xlab(xlab) +
ggplot2::ylab(ylab)
count <- NULL
if (measure == "percent") {
gg <- gg + ggplot2::stat_bin(ggplot2::aes(y = ggplot2::after_stat(count / binwidth)),
col = color_border, breaks = breaks, closed = "right"
) +
ggplot2::scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(x*100, "%"))
}
else {
gg <- gg + ggplot2::stat_bin(ggplot2::aes(y = ggplot2::after_stat(count)),
col = color_border, breaks = breaks, closed = "right"
) +
ggplot2::scale_y_continuous(expand = c(0, 0))
}
return(gg)
}
================================================
FILE: R/ggplot_na_gapsize.R
================================================
#' @title Bar Plot to Visualize Occurrences of Different NA Gap Sizes
#'
#' @description Visualize the Number of Occurrences for existing NA Gap Sizes
#' (NAs in a row) in a Time Series
#'
#' @param x Numeric Vector (\code{\link[base]{vector}}) or Time Series
#' (\code{\link[stats]{ts}}) object containing NAs. This is the only mandatory
#' parameter - all other parameters are only needed for adjusting the plot appearance.
#'
#' @param limit Specifies how many of the most common gap sizes are shown in
#' the plot.Default is 10. So only the 10 most often occurring gapsizes will
#' be shown. If more or all present gap sizes should be displayed, the limit needs
#' to be increased. Since this might add a lot of additional data, having
#' parameter \code{orientation} set to 'horizontal' avoids overlaps in the axis
#' labels.
#'
#' @param include_total When set to TRUE the total NA count for a gapsize is
#' included in the plot (total = number occurrence x gap size).
#' E.g. if a gapsize of 3 occurs 10 times, this means this gap size makes
#' up for 30 NAs in total. This can be a good indicator of the
#' overall impact of a gapsize.
#'
#' @param ranked_by Should the results be sorted according to the number of
#' occurrence or total resulting NAs for a gapsize. Total resulting NAs
#' are calculated by (total = number occurrence x gap size).
#' \itemize{
#' \item{"occurrence" - Sorting by 'number of occurrence' of a gap size}
#'
#' \item{"total" - Sorting by 'total resulting NAs' of a gap size}
#' }
#'
#' The default setting is "occurrence".
#'
#' @param color_occurrence Defines the Color for the Bars of
#' 'number of occurrence'.
#'
#' @param color_total Defines the color for the bars of
#' 'total resulting NAs'.
#'
#' @param color_border Defines the color for the border of the bars.
#'
#' @param alpha_bars Alpha (transparency) value used for filling the bars.
#'
#' @param title Title of the Plot.
#'
#' @param subtitle Subtitle of the Plot.
#'
#' @param xlab Label for x-Axis.
#'
#' @param ylab Label for y-Axis.
#'
#' @param legend If TRUE a legend is added at the bottom.
#'
#' @param orientation Can be either 'vertical' or 'horizontal'. Defines
#' if the bars are plotted vertically or horizontally. For large amounts
#' of different gap sizes horizontal illustration is favorable (also see
#' parameter \code{limit}).
#'
#' @param label_occurrence Defines the label assigned to 'number of occurrence'
#' in the legend.
#' @param label_total Defines the label assigned to 'total resulting NAs'
#' in the legend.
#'
#' @param theme Set a Theme for ggplot2. Default is ggplot2::theme_linedraw().
#' (\code{\link[ggplot2]{theme_linedraw})}
#'
#' @author Steffen Moritz, Sebastian Gatscha
#'
#' @return The output is a \code{\link[ggplot2]{ggplot2}} object that can be
#' further adjusted by using the ggplot syntax
#'
#' @details This plotting function can be used to visualize the length of
#' the NA gaps (NAs in a row) in a time series. It shows a ranking of which
#' gap sizes occur most often. This ranking can be ordered by the number
#' occurrence of the gap sizes or by total resulting NAs for this gap size
#' (occurrence * gap length). A NA-gap of 3 occurring 10 times means 30 total
#' resulting NAs.
#'
#' A resulting plot can for example be described like this:
#' a 2 NA-gap (2 NAs in a row) occurred 27 times,
#' a 9 NA-gap (9 NAs in a row) occurred 11 times,
#' a 27 NA-gap (27 NAs in a row) occurred 1 times, ...
#'
#' The only really needed parameter for this function is x (the univariate
#' time series with NAs that shall be visualized). All other parameters
#' are solely for altering the appearance of the plot.
#'
#' As long as the input is univariate and numeric, the function also takes
#' data.frame, tibble, tsibble, zoo, xts as an input.
#'
#' The plot can be adjusted to your needs via the function parameters.
#' Additionally, for more complex adjustments, the output can also be
#' adjusted via ggplot2 syntax. This is possible, since the output
#' of the function is a ggplot2 object. Also take a look at the Examples
#' to see how adjustments are made.
#'
#' @seealso \code{\link[imputeTS]{ggplot_na_gapsize2}},
#' \code{\link[imputeTS]{ggplot_na_distribution}},
#' \code{\link[imputeTS]{ggplot_na_distribution2}},
#' \code{\link[imputeTS]{ggplot_na_imputations}}
#'
#' @examples
#' # Example 1: Visualize the top gap sizes in tsNH4 (top 10 by default)
#' ggplot_na_gapsize(tsNH4)
#'
#' # Example 2: Visualize the top gap sizes in tsAirgap - horizontal bars
#' ggplot_na_gapsize(tsAirgap, orientation = "vertical")
#'
#' # Example 3: Same as example 1, just written with pipe operator
#' tsNH4 %>% ggplot_na_gapsize()
#'
#' # Example 4: Visualize the top 20 gap sizes in tsNH4
#' ggplot_na_gapsize(tsNH4, limit = 20)
#'
#' # Example 5: Visualize top gap sizes in tsNH4 without showing total NAs
#' ggplot_na_gapsize(tsNH4, limit = 20, include_total = FALSE)
#'
#' # Example 6: Visualize top gap sizes in tsNH4 but ordered by total NAs
#' # (total = occurrence * gap length)
#' ggplot_na_gapsize(tsNH4, limit = 20, ranked_by = "total")
#'
#' # Example 7: Visualize top gap sizes in tsNH4 - different theme
#' # Plot adjustments via ggplot_na_gapsize function parameters
#' ggplot_na_gapsize(tsNH4, theme = ggplot2::theme_classic())
#'
#' # Example 8: Visualize top gap sizes in tsNH4 - title, subtitle in center
#' # Plot adjustments via ggplot2 syntax
#' ggplot_na_gapsize(tsNH4) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) +
#' ggplot2::theme(plot.subtitle = ggplot2::element_text(hjust = 0.5))
#'
#' # Example 9: Visualize top gap sizes in tsNH4 - title in center, no subtitle
#' # Plot adjustments via ggplot2 syntax and function parameters
#' ggplot_na_gapsize(tsNH4, subtitle = NULL) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
#'
#' # Example 10: Top gap sizes in tsNH4 - legend on the right and color change
#' # Plot adjustments via ggplot2 syntax and function parameters
#' ggplot_na_gapsize(tsNH4, color_total = "grey") +
#' ggplot2::theme(legend.position = "right")
#' @importFrom magrittr %>%
#'
#' @importFrom ggplot2 theme_linedraw ggplot geom_bar position_dodge aes scale_x_discrete
#' scale_fill_manual ggtitle xlab ylab theme element_text element_blank
#' coord_flip theme_classic
#'
#' @export
ggplot_na_gapsize <- function(x,
limit = 10,
include_total = TRUE,
ranked_by = "occurrence",
color_occurrence = "indianred",
color_total = "steelblue",
color_border = "black",
alpha_bars = 1,
title = "Occurrence of gap sizes",
subtitle = "Gap sizes (NAs in a row) ordered by most common",
xlab = NULL,
ylab = "Number occurrence",
legend = TRUE,
orientation = "horizontal",
label_occurrence = "Number occurrence gapsize",
label_total = "Resulting NAs for gapsize",
theme = ggplot2::theme_linedraw()) {
data <- x
##
## 1. Input Check and Transformation
##
# 1.1 special handling data types
if (any(class(data) == "tbl_ts")) {
data <- as.vector(as.data.frame(data)[, 2])
}
else if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.2 Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
stop("x is not univariate. The function only works with univariate
input for x. For data types with multiple variables/columns only input
the column you want to plot as parameter x.")
}
# 1.3 Checks and corrections for wrong data dimension
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.4 Input as vector
data <- as.vector(data)
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric")
}
# 1.6 Check preconditions about amount of NAs
# exclude NA only inputs
missindx <- is.na(data)
if (all(missindx)) {
stop("Input data consists only of NAs. At least one non-NA numeric value is needed
for creating a meaningful ggplot_na_gapsize plot)")
}
# exclude inputs without NAs
if (!anyNA(data)) {
stop("Input data contains no NAs. At least one missing value is needed
to create a meaningful ggplot_na_gapsize plot)")
}
##
## End Input Check and Transformation
##
##
## 2. Preparations
##
# 2.1 Create required data
# Calculation consecutive NA information
rle_na <- base::rle(is.na(data))
vec <- rle_na$lengths[rle_na$values]
occurrence_bar <- table(vec)
gaps_vec <- as.integer(names(occurrence_bar))
totals_bar <- occurrence_bar * gaps_vec
labels1 <- paste0(gaps_vec, " NA-gap")
# 2.2 Adjust to parameter selection by user
# Sorting for ranked_by param
if (ranked_by == "occurrence") {
# sort according to occurrence of gapsizes
fooind <- order(occurrence_bar)
occurrence_bar <- occurrence_bar[fooind]
totals_bar <- totals_bar[fooind]
labels1 <- labels1[fooind]
} else if (ranked_by == "total") {
# sort accoding to total NAs
fooind <- order(totals_bar)
occurrence_bar <- occurrence_bar[fooind]
totals_bar <- totals_bar[fooind]
labels1 <- labels1[fooind]
}
else {
stop("Wrong input for parameter ranked_by. Input must be either 'occurrence' or 'total'.
Call ?ggplot_na_gapsize to view the documentation.")
}
# Adjust to show only a limited amount of bars for limit param
if (length(occurrence_bar) > limit) {
occurrence_bar <- occurrence_bar[(length(occurrence_bar) - limit + 1):length(occurrence_bar)]
totals_bar <- totals_bar[(length(totals_bar) - limit + 1):length(totals_bar)]
labels1 <- labels1[(length(labels1) - limit + 1):length(labels1)]
}
# 2.3 Create dataframe for ggplot2
# data.frame for ggplot
id <- seq_along(occurrence_bar)
val <- c(occurrence_bar, totals_bar)
label <- c(
rep("occurrence_bar", length(occurrence_bar)),
rep("totals_bar", length(totals_bar))
)
df <- data.frame(id, val, label)
# Only number of occurrences bar
if (include_total == FALSE) {
df <- subset(df, label == "occurrence_bar")
}
##
## End Preparations
##
##
## 3. Create the ggplot2 plot
##
# Create ggplot
gg <- ggplot2::ggplot(data = df) +
ggplot2::geom_bar(aes(x = id, y = val, fill = label),
color = color_border,
width= 0.6,
alpha = alpha_bars,
stat = "identity", position = position_dodge(width = 0.7)
) +
ggplot2::scale_x_discrete(
labels = labels1,
limits = labels1
) +
ggplot2::scale_fill_manual(
values = c(color_occurrence, color_total),
labels = c(label_occurrence, label_total),
) +
ggplot2::ggtitle(title, subtitle = subtitle) +
ggplot2::xlab(xlab) +
ggplot2::ylab(ylab) +
theme +
ggplot2::theme(
legend.position = "bottom",
axis.text.x = ggplot2::element_text(angle = 30, hjust = 1),
legend.title = ggplot2::element_blank()
)
# For flipping from vertical to horizontal bars
if (orientation == "horizontal") {
gg <- gg + ggplot2::coord_flip()
}
# Removing legend
if (!legend) {
gg <- gg +
ggplot2::theme(
legend.position = "none",
)
}
##
## End creating the ggplot2 plot
##
return(gg)
}
================================================
FILE: R/ggplot_na_gapsize2.R
================================================
#' @title Bubble Plot to Visualize Total NA Count of NA gap sizes
#'
#' @description Visualize the total NA count (gap size * occurrence) for
#' the existing gaps sizes (NAs in a row).
#'
#' @param x Numeric Vector (\code{\link[base]{vector}}) or Time Series
#' (\code{\link[stats]{ts}}) object containing NAs. This is the only
#' mandatory parameter - all other parameters are only needed for adjusting
#' the plot appearance.
#'
#' @param colors_bubbles Choose a color gradient that encodes lower to
#' higher total NA counts.
#' Color codes can be given as vector. Using color palettes from colorspace,
#' grDevices, RColorBrewer or other packages is useful here.
#' E.g. grDevices::heat.colors(10) would be a possible input.
#'
#' @param color_border Color for the border of the bubbles.
#'
#' @param alpha_bubbles Alpha (transparency) value used for filling the bubbles.
#'
#' @param labels_bubbles Should labels be added to the individual bubbles inside
#' the plot.
#' For many datasets there will be overplotting issues once labels are added.
#' In these cases using the min_gapsize, min_totals or min_occurrence options
#' might be useful to only display the most relevant gap sizes.
#'
#' You can choose between these labels to be added:
#' \itemize{
#' \item{"none" - No label gets added to the bubbles}
#' (default choice)
#'
#' \item{"gap" - Adds a label displaying the gap size belonging to the
#' respective bubble}
#'
#' \item{"total" - Adds a label displaying the total NA count for the
#' respective bubble}
#'
#' \item{"gap-occurrence" - Adds a label displaying the respective
#' gap size and number of its occurrence}
#' }
#'
#' The default setting is "none".
#'
#' @param size_bubbles Allows to scale the size of the bubbles.
#' Some experimenting with this parameter might be needed to get
#' a good visualization for your specific dataset.
#'
#' @param min_totals Only print bubbles for gap sizes that account
#' for at least min_totals NAs in the time series.
#'
#' @param min_occurrence Only print bubbles for gap sizes that occur at least
#' min_occurrence times in the time series.
#'
#' @param min_gapsize Only show gap sizes larger than min_gapsize. Together with
#' max_gapsize enables zooming into in certain regions of interest.
#'
#' @param max_gapsize Only show gapsizes smaller than max_gapsize. Together with
#' min_gapsize enables zooming into in certain regions of interest.
#'
#' @param title Title of the Plot.
#'
#' @param subtitle Subtitle of the Plot.
#'
#' @param xlab Label for x-Axis.
#'
#' @param ylab Label for y-Axis.
#'
#' @param legend If TRUE a legend is added on the right side
#'
#' @param legend_breaks Number of displayed breaks / labels in the legend.
#' Needs an integer giving the desired number of breaks as input. Breakpoints are
#' internally calculated by R's pretty() function, which can also lead to
#' values slightly smaller or larger than the desired number.
#'
#'
#' @param legend_title Defines the title of the legend.
#'
#' @param legend_position Defines position of the legend. Choose either
#' 'bottom', right', 'left' or 'top'.
#'
#' @param legend_point_sizes Defines the size of the symbols representing the total
#' NA bubbles in the legend.
#'
#' You can choose between "default", "actual" or a custom vector of sizes.
#'
#' \itemize{
#' \item{"default" - Scales the points in the legend to symbolically
#' resemble the size differences} (default choice)
#'
#' \item{"actual" - Scales the points in the legend according
#' to their actual size in the plot}
#' }
#'
#' Since these two options are not be always sufficient, a custom vector of
#' sizes can be used as input. This would look like this: c(4,5,6,7). Be
#' aware, that the length of this vector must match the number of breakpoints
#' (can be adjusted with legend_breaks).
#'
#' @param theme Set a theme for ggplot2. Default is ggplot2::theme_linedraw().
#' (\code{\link[ggplot2]{theme_linedraw})}
#'
#' @author Steffen Moritz
#'
#' @return The output is a \code{\link[ggplot2]{ggplot2}} object that can be
#' further adjusted by using the ggplot syntax
#'
#' @details This function visualizes total NA counts by individual gap size
#' (consecutive NAs) in a time series. The bubble plot makes it easy to see
#' which gap sizes account for most of the NAs in the series. The size and
#' color of the bubbles represent the total number of NAs a given gap size
#' accounts for.
#'
#' Total NAs for a gap size are calculated as follows:
#' total NAs = occurrence * gap length
#'
#' For example, interpret a bubble for gap size 2 as follows:
#' a 2-NA gap (two NAs in a row) occurred 27 times in the time series and thus
#' accounts for 54 total NAs.
#'
#' On the x-axis, the different gap sizes are plotted in increasing order.
#' The y-axis shows the occurrence count of these gap sizes in the time series.
#'
#' The plot is useful for investigating possible root causes of the missing
#' data. It can indicate whether the missing data are random or whether there
#' are patterns of interest.
#'
#' Depending on the input time series, there might be too much information in
#' the plot, leading to overplotting. In these cases, use the parameters
#' \code{min_totals}, \code{min_occurrence}, and \code{min_gapsize} to display
#' only the information of interest.
#'
#' The only required parameter is \code{x} (the univariate time series with NAs
#' to visualize). All other parameters alter the appearance of the plot.
#'
#' As long as the input is univariate and numeric, the function also accepts
#' \code{data.frame}, \code{tibble}, \code{tsibble}, \code{zoo}, or \code{xts}
#' input.
#'
#' The plot can be adjusted via function parameters. For more complex
#' adjustments, you can modify the result using ggplot2 syntax, since the
#' function returns a ggplot2 object. See the Examples for typical adjustments.
#'
#'
#' @seealso \code{\link[imputeTS]{ggplot_na_distribution}},
#' \code{\link[imputeTS]{ggplot_na_distribution2}},
#' \code{\link[imputeTS]{ggplot_na_gapsize}},
#' \code{\link[imputeTS]{ggplot_na_imputations}}
#'
#' @examples
#' # Example 1: Visualize total NA counts in tsNH4
#' ggplot_na_gapsize2(tsNH4)
#'
#' # Example 2: Visualize total NA counts in tsNH4, different color gradient
#' ggplot_na_gapsize2(tsNH4, colors_bubbles = rev(grDevices::heat.colors(10)))
#'
#' # Example 3: Same as example 1, just written with pipe operator
#' tsNH4 %>% ggplot_na_gapsize2()
#'
#' # Example 4: Visualize total NA counts in tsHeating
#' # Limited to gap sizes that account for a total of > 600 NAs
#' ggplot_na_gapsize2(tsHeating, min_totals = 600)
#'
#' # Example 5: Visualize total NA counts in tsNH4 - no legend
#' ggplot_na_gapsize2(tsNH4, legend = FALSE)
#'
#' # Example 6: Visualize total NA counts in tsAirgap - increased bubble size
#' ggplot_na_gapsize2(tsAirgap, size_bubbles = 35)
#'
#' # Example 7: Visualize total NA counts in tsNH4
#' # Plot adjustments via ggplot_na_gapsize2 function parameters
#' ggplot_na_gapsize2(tsNH4, theme = ggplot2::theme_classic())
#'
#' # Example 8: Visualize total NA counts in tsNH4 - title, subtitle in center
#' # Plot adjustments via ggplot2 syntax
#' ggplot_na_gapsize2(tsNH4) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) +
#' ggplot2::theme(plot.subtitle = ggplot2::element_text(hjust = 0.5))
#'
#' # Example 9: Visualize total NA counts in tsNH4 - title in center, no subtitle
#' # Plot adjustments via ggplot2 syntax and function parameters
#' ggplot_na_gapsize2(tsNH4, subtitle = NULL) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
#'
#' # Example 10: Total NA counts in tsNH4 - legend on the bottom and color change
#' # Plot adjustments via ggplot2 syntax and function parameters
#' ggplot_na_gapsize2(tsNH4, colors_bubbles = grDevices::heat.colors(10)) +
#' ggplot2::theme(legend.position = "bottom")
#' @importFrom magrittr %>%
#'
#' @importFrom ggplot2 theme_linedraw ggplot aes geom_point scale_size_identity
#' geom_text scale_x_continuous scale_y_continuous scale_fill_gradientn
#' guide_legend ggtitle xlab ylab theme element_text theme_classic
#'
#' @importFrom grDevices heat.colors
#'
#' @export
ggplot_na_gapsize2 <- function(x,
colors_bubbles = c("#FCFBFF", "#EFEEFA", "#DDDAEF",
"#C8C3E2", "#B1AAD4", "#9A8FC4",
"#8273B5", "#6B56A7", "#553695",
"#3D1778"),
color_border = "black",
alpha_bubbles = 0.4,
labels_bubbles = "none",
size_bubbles = 25,
min_totals = NULL,
min_occurrence = NULL,
min_gapsize = NULL,
max_gapsize = NULL,
title = "Gap Size Analysis",
subtitle = "Total NA counts for different gapsizes",
xlab = "Gapsize",
ylab = "Number occurrence",
legend = TRUE,
legend_breaks = 4,
legend_title = "Total NAs",
legend_position = "right",
legend_point_sizes = "default",
theme = ggplot2::theme_linedraw()) {
data <- x
##
## 1. Input Check and Transformation
##
# 1.1 special handling data types
if (any(class(data) == "tbl_ts")) {
data <- as.vector(as.data.frame(data)[, 2])
}
else if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.2 Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
stop("x is not univariate. The function only works with univariate
input for x. For data types with multiple variables/columns only input
the column you want to plot as parameter x.")
}
# 1.3 Checks and corrections for wrong data dimension
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.4 Input as vector
data <- as.vector(data)
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric")
}
# 1.6 Check preconditions about amount of NAs
# exclude NA only inputs
missindx <- is.na(data)
if (all(missindx)) {
stop("Input data consists only of NAs. At least one non-NA numeric value is needed
for creating a meaningful ggplot_na_gapsize2 plot)")
}
# exclude inputs without NAs
if (!anyNA(data)) {
stop("Input data contains no NAs. At least one missing value is needed
to create a meaningful ggplot_na_gapsize2 plot)")
}
##
## End Input Check and Transformation
##
##
## 2. Preparations
##
# 2.1 Create required data
# Calculation consecutive NA information
rle_na <- base::rle(is.na(data))
vec <- rle_na$lengths[rle_na$values]
gap_table <- table(vec)
gap_names <- as.integer(names(gap_table))
occurrences <- as.integer(gap_table)
totals <- occurrences * gap_names
# 2.2 Create dataframe for ggplot2
df <- data.frame(gap = gap_names, occurrence = occurrences, total = totals)
# 2.3 Adjust data to user selected parameters / filter
# Filters to display only subsets of the data
# Maximum Gapsize
if (!is.null(max_gapsize)) {
df <- subset(df, gap <= max_gapsize)
}
# Minimum gapsize
if (!is.null(min_gapsize)) {
df <- subset(df, gap >= min_gapsize)
}
# Minimum Total NAs
if (!is.null(min_totals)) {
df <- subset(df, total >= min_totals)
}
# Minimum Occurrence NAs
if (!is.null(min_occurrence)) {
df <- subset(df, occurrence >= min_occurrence)
}
# Error for too restrictive filters leaving no NA data to display
if (length(df$gap) < 1) {
stop("Too restrictive filter options set - nothing to display left.
Your setting of either max_gapsize, min_gapsize, min_totals, min_occurrence or the
combination of them left no NA data to display.)")
}
# 2.4 Calculate legend breaks and sizes
# Create legend break points with pretty function.
# Only use points within limits - otherwise there will be an error
leg_breaks <- base::pretty(df$total, n = legend_breaks)
leg_breaks <- leg_breaks[leg_breaks >= min(df$total) & leg_breaks <= max(df$total)]
# Prevent empty breaks, when pretty() only chooses values outside limits
if (length(leg_breaks) == 0) {
leg_breaks <- totals[1]
}
# Define size of points in legend
# Manual definition of legend point size
if (is.numeric(legend_point_sizes)) {
if (length(legend_point_sizes) == length(leg_breaks)) {
leg_sizes <- legend_point_sizes
}
else {
stop("When you input your own custom values for the size of the points in the legend,
make sure your vector has the same size as are breaks in the legend.")
}
}
# Scale points in the legend with a symbolic, sensible size
else if (legend_point_sizes == "default") {
leg_sizes <- seq(from = 3, by = 2, length.out = length(leg_breaks))
}
# Scale points in the legend according to their actual size in the plot
else if (legend_point_sizes == "actual") {
leg_sizes <- leg_breaks / (max(df$total) / size_bubbles)
}
else {
stop("Wrong values for parameter legend_pont_sizes chosen.
To influence the size of points in the legend,
either choose 'default', 'actual' or give a vector with your own desired sizes.
This custom vector needs to have exactly as many elements as the legend has breaks")
}
##
## End Preparations
##
##
## 3. Create the ggplot2 plot
##
# Workaround for 'no visible binding' check() caused by ggplot2 vars
gap <- df$gap
occurrence <- df$occurrence
total <- df$total
# Create ggplot
gg <- ggplot2::ggplot(data = df, ggplot2::aes(x = gap, y = occurrence)) +
ggplot2::geom_point(
alpha = alpha_bubbles, ggplot2::aes(
fill = total, size =
total / (max(total) / size_bubbles)
),
color = color_border, pch = 21
) +
ggplot2::scale_size_identity()
# What to appear in the label, default no label
if (labels_bubbles == "gap-occurrence") {
gg <- gg + ggplot2::geom_text(ggplot2::aes(label = paste0(gap, "-gap\n", occurrence, "x")),
size = 2, alpha = 1, color = "black"
)
}
else if (labels_bubbles == "gap") {
gg <- gg + ggplot2::geom_text(ggplot2::aes(label = paste0(gap, "-gap")),
size = 2, alpha = 1, color = "black"
)
}
else if (labels_bubbles == "total") {
gg <- gg + ggplot2::geom_text(ggplot2::aes(label = paste0(total)),
size = 2, alpha = 1, color = "black"
)
}
else if (labels_bubbles == "occurrence") {
gg <- gg + ggplot2::geom_text(ggplot2::aes(label = paste0(occurrence,"x")),
size = 2, alpha = 1, color = "black"
)
}
gg <- gg + ggplot2::scale_x_continuous(
expand = c(0.1, 0.1),
breaks = function(x) unique(floor(base::pretty(seq(0, (max(x) + 1) * 1.1))))
) +
ggplot2::scale_y_continuous(
expand = c(0.1, 0.1),
breaks = function(x) unique(floor(base::pretty(seq(0, (max(x) + 1) * 1.1))))
) +
ggplot2::scale_fill_gradientn(
colors = colors_bubbles,
breaks = leg_breaks,
guide = ggplot2::guide_legend(
title = legend_title,
override.aes = list(size = leg_sizes)
)
) +
ggplot2::ggtitle(title, subtitle = subtitle) +
ggplot2::xlab(xlab) +
ggplot2::ylab(ylab) +
theme +
ggplot2::theme(
legend.position = legend_position,
axis.text.x = ggplot2::element_text(angle = 30, hjust = 1),
)
# Removing legend
if (!legend) {
gg <- gg +
ggplot2::theme(
legend.position = "none",
)
}
##
## End creating the ggplot2 plot
##
return(gg)
}
================================================
FILE: R/ggplot_na_imputations.R
================================================
#' @title Line Plot to Visualize Imputed Values
#'
#' @description Visualize the imputed values in a time series.
#'
#' @param x_with_na Numeric Vector or Time Series (\code{\link{ts}}) object
#' with NAs before imputation. This parameter and x_with_imputation shave to
#' be set. The rest of the parameters are mostly needed for adjusting the plot
#' appearance.
#'
#' @param x_with_imputations Numeric Vector or Time Series (\code{\link{ts}})
#' object with NAs replaced by imputed values. This parameter and
#' x_with_imputation shave to be set.The rest of the parameters are mostly
#' needed for adjusting the plot appearance.
#'
#' @param x_with_truth Numeric Vector or Time Series (\code{\link{ts}}) object
#' with the real values (optional parameter). If the ground truth is known
#' (e.g. in experiments where the missing values were artificially added)
#' it can be displayed in the plot with this parameter.
#' Default is NULL (ground truth not known).
#'
#' @param x_axis_labels For adding specific x-axis labels. Takes a vector of
#' \code{\link[base]{Date}} or \code{\link[base]{POSIXct}} objects as an input
#' (needs the same length as x_with_na).
#' The Default (NULL) uses the observation numbers as x-axis tick labels.
#'
#' @param title Title of the Plot.
#'
#' @param subtitle Subtitle of the Plot.
#'
#' @param xlab Label for x-Axis.
#'
#' @param ylab Label for y-Axis.
#'
#' @param color_points Color for the Symbols/Points of the non-NA Observations.
#'
#' @param color_imputations Color for the Symbols/Points of the Imputed Values.
#'
#' @param color_truth Color for the Symbols/Points of the NA value Ground Truth
#' (only relevant when x_with_truth available).
#'
#' @param shape_points Shape for the Symbols/Points of the non-NA observations.
#' See https://ggplot2.tidyverse.org/articles/ggplot2-specs.html as reference.
#'
#' @param shape_imputations Shape for the Symbols/Points of the imputed values.
#' See https://ggplot2.tidyverse.org/articles/ggplot2-specs.html as reference.
#'
#' @param shape_truth Shape for the Symbols/Points of the NA value Ground Truth
#' (only relevant when x_with_truth available).
#'
#' @param size_points Size for the Symbols/Points of the non-NA Observations.
#'
#' @param size_imputations Size for the Symbols/Points of the Imputed Values.
#'
#' @param size_truth Size for the Symbols/Points of the NA value Ground Truth
#' (only relevant when x_with_truth available).
#'
#' @param color_lines Color for the Lines connecting the Observations/Points.
#'
#' @param width_lines Width for the Lines connecting the Observations/Points.
#'
#' @param linetype Linetype for the Lines connecting the Observations/Points.
#'
#' @param connect_na If TRUE the Imputations are connected
#' to the non-NA observations in the plot. Otherwise there are no
#' connecting lines between symbols in NA areas.
#'
#' @param legend If TRUE a Legend is added at the bottom.
#'
#' @param legend_size Size of the Symbols used in the Legend.
#'
#' @param label_known Legend label for the non-NA Observations.
#'
#' @param label_imputations Legend label for the Imputed Values.
#'
#' @param label_truth Legend label for the Ground Truth of the NA values.
#'
#' @param theme Set a Theme for ggplot2. Default is ggplot2::theme_linedraw().
#' (\code{\link[ggplot2]{theme_linedraw})}
#'
#' @details This plot can be used, to visualize imputed values for a time
#' series. Imputed values (filled NA gaps) are shown in a different color
#' than the other values. If real values (ground truth) for the NA gaps are known,
#' they can be optionally added in a different color.
#'
#' The only really needed parameters for this function are x_with_na
#' (the time series with NAs before imputation) and x_with_imputations
#' (the time series without NAs after imputation). All other parameters
#' are msotly for altering the appearance of the plot.
#'
#' As long as the input is univariate and numeric the function also takes
#' data.frame, tibble, tsibble, zoo, xts as an input.
#'
#' The plot can be adjusted to your needs via the function parameters.
#' Additionally, for more complex adjustments, the output can also be
#' adjusted via ggplot2 syntax. This is possible, since the output
#' of the function is a ggplot2 object. Also take a look at the Examples
#' to see how adjustments are made.
#'
#' @author Steffen Moritz, Sebastian Gatscha
#'
#'
#' @seealso \code{\link[imputeTS]{ggplot_na_distribution}},
#' \code{\link[imputeTS]{ggplot_na_distribution2}},
#' \code{\link[imputeTS]{ggplot_na_gapsize}},
#' \code{\link[imputeTS]{ggplot_na_gapsize2}}
#'
#' @examples
#' # Example 1: Visualize imputation by na_mean
#' imp_mean <- na_mean(tsAirgap)
#' ggplot_na_imputations(tsAirgap, imp_mean)
#'
#'
#' # Example 2: Visualize imputation by na_locf and added ground truth
#' imp_locf <- na_locf(tsAirgap)
#' ggplot_na_imputations(x_with_na = tsAirgap,
#' x_with_imputations = imp_locf,
#' x_with_truth = tsAirgapComplete
#' )
#'
#'
#' # Example 3: Visualize imputation by na_kalman
#' imp_kalman <- na_kalman(tsAirgap)
#' ggplot_na_imputations(x_with_na = tsAirgap, x_with_imputations = imp_kalman)
#'
#'
#' # Example 4: Same as example 1, just written with pipe operator
#' tsAirgap %>%
#' na_mean() %>%
#' ggplot_na_imputations(x_with_na = tsAirgap)
#'
#'
#' # Example 5: Visualize imputation by na_seadec - different color for imputed points
#' # Plot adjustments via ggplot_na_imputations function parameters
#' imp_seadec <- na_seadec(tsAirgap)
#' ggplot_na_imputations(x_with_na = tsAirgap,
#' x_with_imputations = imp_seadec,
#' color_imputations = "gold")
#'
#'
#' # Example 6: Visualize imputation - different theme, point size imputations
#' # Plot adjustments via ggplot_na_imputations function parameters
#' imp_seadec <- na_seadec(tsAirgap)
#' ggplot_na_imputations(x_with_na = tsAirgap,
#' x_with_imputations = imp_seadec,
#' theme = ggplot2::theme_classic(),
#' size_imputations = 5)
#'
#'
#' # Example 7: Visualize imputation - title, subtitle in center
#' # Plot adjustments via ggplot2 syntax
#' imp_seadec <- na_seadec(tsAirgap)
#' ggplot_na_imputations(x_with_na = tsAirgap, x_with_imputations = imp_seadec) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) +
#' ggplot2::theme(plot.subtitle = ggplot2::element_text(hjust = 0.5))
#'
#'
#' # Example 8: Visualize imputation - title in center, no subtitle
#' # Plot adjustments via ggplot2 syntax and function parameters
#' imp_mean <- na_mean(tsAirgap)
#' ggplot_na_imputations(x_with_na = tsAirgap,
#' x_with_imputations = imp_mean,
#' subtitle = NULL) +
#' ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
#'
#' @importFrom magrittr %>%
#'
#' @importFrom ggplot2 theme_linedraw ggplot geom_line aes geom_point
#' scale_color_manual element_blank xlab ylab ggtitle guides guide_legend
#' theme theme_classic
#'
#'
#' @export
ggplot_na_imputations <- function(x_with_na,
x_with_imputations,
x_with_truth = NULL,
x_axis_labels = NULL,
title = "Imputed Values",
subtitle = "Visualization of missing value replacements",
xlab = "Time",
ylab = "Value",
color_points = "steelblue",
color_imputations = "indianred",
color_truth = "seagreen3",
color_lines = "lightslategray",
shape_points = 16,
shape_imputations = 18,
shape_truth = 16,
size_points = 1.5,
size_imputations = 2.5,
size_truth = 1.5,
width_lines = 0.5,
linetype = "solid",
connect_na = TRUE,
legend = TRUE,
legend_size = 5,
label_known = "known values",
label_imputations = "imputed values",
label_truth = "ground truth",
theme = ggplot2::theme_linedraw()) {
##
## 1. Input Check and Transformation
##
# 1.1 special handling data types
# x_with_na
if (any(class(x_with_na) == "tbl_ts")) {
x_with_na <- as.vector(as.data.frame(x_with_na)[, 2])
}
else if (any(class(x_with_na) == "tbl")) {
x_with_na <- as.vector(as.data.frame(x_with_na)[, 1])
}
# x_with_imputations
if (any(class(x_with_imputations) == "tbl_ts")) {
x_with_imputations <- as.vector(as.data.frame(x_with_imputations)[, 2])
}
else if (any(class(x_with_imputations) == "tbl")) {
x_with_imputations <- as.vector(as.data.frame(x_with_imputations)[, 1])
}
# x_with_truth
if (any(class(x_with_truth) == "tbl_ts")) {
x_with_truth <- as.vector(as.data.frame(x_with_truth)[, 2])
}
else if (any(class(x_with_truth) == "tbl")) {
x_with_truth <- as.vector(as.data.frame(x_with_truth)[, 1])
}
# 1.2 Check if the input is multivariate
if (!is.null(dim(x_with_na)[2]) && dim(x_with_na)[2] > 1) {
stop("x_with_na is not univariate.
The function only works with univariate input for x_with_na.
For data types with multiple variables/columns only input the
column you want to plot as parameter x_with_na.")
}
if (!is.null(dim(x_with_imputations)[2]) && dim(x_with_imputations)[2] > 1) {
stop("x_with_imputations is not univariate.
The function only works with univariate input for x_with_imputations.
For data types with multiple variables/columns only input the column
you want to plot as parameter x_with_imputations")
}
if (!is.null(dim(x_with_truth)[2]) && dim(x_with_truth)[2] > 1) {
stop("x_with_na is not univariate.
The function only works with univariate input for x_with_truth.
For data types with multiple variables/columns only input the
column you want to plot as parameter x_with_truth")
}
# 1.3 Checks and corrections for wrong data dimension
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(x_with_na)[2])) {
x_with_na <- x_with_na[, 1]
}
if (!is.null(dim(x_with_imputations)[2])) {
x_with_imputations <- x_with_imputations[, 1]
}
if (!is.null(dim(x_with_truth)[2])) {
x_with_truth <- x_with_truth[, 1]
}
# 1.4 Input as vector
x_with_na <- as.vector(x_with_na)
x_with_imputations <- as.vector(x_with_imputations)
x_with_truth <- as.vector(x_with_truth)
# 1.5 Check if input is numeric
if (!is.numeric(x_with_na)) {
stop("Input x_with_na is not numeric")
}
if (!is.numeric(x_with_imputations)) {
stop("Input x_with_imputations is not numeric")
}
if (!is.numeric(x_with_truth) && !is.null(x_with_truth)) {
stop("Input x_with_truth is not numeric")
}
# 1.6 Same length of the series
# x_with_na and x_with_imputations need same length
if (length(x_with_na) != length(x_with_imputations)) {
stop("Input x_with_na and x_with_imputations need to have the same length.
x_with_na is the time series with NAs before imputation.
x_with_imputations is the time series with filled NAs after applying imputation.")
}
# if x_with_truth available it needs also same length
if (!is.null(x_with_truth) && (length(x_with_na) != length(x_with_truth))) {
stop("Input x_with_na, x_with_imputations and x_with_truth need to have the same length.
x_with_na is the time series with NAs before imputation.
x_with_imputations is the time series with filled NAs after applying imputation.
x_with_truth (optional) is the series with the ground truth for the imputed values")
}
# 1.7 Check preconditions about amount of NAs
# Unwanted all NA inputs
missindx_x_with_na <- is.na(x_with_na)
if (all(missindx_x_with_na)) {
stop("Input x_with_na consists only of NAs.
Something with the input likely went wrong.
Creating a ggplot_na_imputations plot does not make sense with an all NA input.
This are the required inputs:
x_with_na (time series before imputation that still has NAs),
x_with_imputations (time series after imputation, where NAs were replaced by imputation")
}
missindx_x_with_imputations <- is.na(x_with_imputations)
if (all(missindx_x_with_imputations)) {
stop("Input x_with_imputations consists only of NAs.
Something with the input likely went wrong.
Creating a ggplot_na_imputations plot does not make sense with an all NA input.
This are the required inputs:
x_with_na (time series before imputation that still has NAs),
x_with_imputations (time series after imputation, where NAs were replaced by imputation")
}
# Unwanted no NA inputs
if (!anyNA(x_with_na)) {
stop("Input x_with_na contains no NAs. At least one missing value is needed
to create a meaningful ggplot_na_imputations plot)
This are the required inputs:
x_with_na (time series before imputation that still has NAs),
x_with_imputations (time series after imputation, where NAs were replaced by imputation")
}
##
## End Input Check and Transformation
##
##
## 2. Preparations
##
# 2.1 Create dataframe for ggplot2
# Define x-axis label data
# if Date or POSIXct given for x_axis_labels time information can be plotted
if (any(class(x_axis_labels) == "Date")) {
time <- x_axis_labels
}
else if (any(class(x_axis_labels) == "POSIXct")) {
time <- x_axis_labels
}
else if (is.null(x_axis_labels)) {
time <- seq_along(x_with_na)
}
else {
stop("Input for x_axis_labels is not in a supported format, must be a
vector of Date or a POSIXct objects with the same length as
x_with_na and x_with_imputations")
}
if (!is.null(x_with_truth)) {
df <- data.frame(time, x_with_imputations, x_with_na, x_with_truth)
}
else {
df <- data.frame(time, x_with_imputations, x_with_na)
}
##
## End Preparations
##
##
## 3. Create the ggplot2 plot
##
# Create the plot
gg <- ggplot2::ggplot(data = df)
## Add Lines
# Don't connect the lines in the missing areas
if (connect_na == FALSE) {
gg <- gg + ggplot2::geom_line(
data = df, ggplot2::aes(x = time, y = x_with_na),
na.rm = TRUE, color = color_lines,
linetype = linetype, linewidth = width_lines
)
}
# If truth available connect the true values in the missing areas
else if (!is.null(x_with_truth)) {
gg <- gg + ggplot2::geom_line(
data = df, ggplot2::aes(x = time, y = x_with_truth),
na.rm = TRUE, color = color_lines,
linetype = linetype, linewidth = width_lines
)
}
# If no truth available connect the imputed values in the missing areas
else {
gg <- gg + ggplot2::geom_line(
data = df, ggplot2::aes(x = time, y = x_with_imputations),
na.rm = TRUE, color = color_lines,
linetype = linetype, linewidth = width_lines
)
}
# Remove known values from imputations - to avoid overplotting
df$x_with_imputations[!is.na(x_with_na)] <- NA
if (!is.null(x_with_truth)) {
df$x_with_truth[!is.na(x_with_na)] <- NA
}
## Add points
# Points for regular, known values
gg <- gg + ggplot2::geom_point(
data = df, ggplot2::aes(x = time, y = x_with_na, color = "1"),
na.rm = TRUE, shape = shape_points, size = size_points
)
# Points for Imputations
gg <- gg + ggplot2::geom_point(
data = df, ggplot2::aes(x = time, y = x_with_imputations, color = "2"),
na.rm = TRUE, size = size_imputations, shape = shape_imputations
)
# Points for truth
if (!is.null(x_with_truth)) {
gg <- gg + ggplot2::geom_point(
data = df, ggplot2::aes(x = time, y = x_with_truth, color = "3"),
na.rm = TRUE, shape = shape_truth, size = size_truth
)
}
if (!is.null(x_with_truth)) {
gg <- gg + ggplot2::scale_color_manual(
name = ggplot2::element_blank(),
breaks = c("1", "2", "3"),
labels = c(label_known, label_imputations, label_truth),
values = c(color_points, color_imputations, color_truth)
)
}
else {
gg <- gg + ggplot2::scale_color_manual(
name = ggplot2::element_blank(),
breaks = c("1", "2"),
labels = c(label_known, label_imputations),
values = c(color_points, color_imputations)
)
}
gg <- gg + ggplot2::ylab(ylab) + ggplot2::xlab(xlab) +
ggplot2::ggtitle(label = title, subtitle = subtitle) + theme
if (!is.null(x_with_truth)) {
gg <- gg + ggplot2::guides(color = ggplot2::guide_legend(
override.aes = list(size = legend_size,
shape = c(shape_points, shape_imputations, shape_truth))
))
}
else {
gg <- gg + ggplot2::guides(color = ggplot2::guide_legend(
override.aes = list(size = legend_size,
shape = c(shape_points, shape_imputations))
))
}
gg <- gg + ggplot2::theme(
legend.position = base::ifelse(legend == TRUE, "bottom", "none"),
legend.title = ggplot2::element_blank()
)
##
## End creating the ggplot2 plot
##
return(gg)
}
================================================
FILE: R/imputeTS-package.R
================================================
#' @keywords internal
"_PACKAGE"
#' @title imputeTS-package description
#'
#' @description
#' The imputeTS package is a collection of algorithms and tools for univariate time series imputation.
#'
#' @details The imputeTS package specializes on (univariate) time series imputation.
#' It offers several different imputation algorithm implementations. Beyond the imputation algorithms
#' the package also provides plotting and printing functions of missing data statistics.
#'
#' The package is easy to use:
#'
#' - To impute (fill all missing values) in a time series \code{x}, run:\cr
#' \code{na_interpolation(x)} \cr
#'
#' - To plot missing data statistics for a time series \code{x}, run:\cr
#' \code{ggplot_na_distribution(x)}\cr
#'
#' - To print missing data statistics for a time series \code{x}, run:\cr
#' \code{statsNA(x)}\cr
#'
#' Every other imputation function (starting with na_'algorithm name') and plotting
#' function (starting with plotNA_'plot name') work the same way as in this example.
#'
#' @name imputeTS-package
#'
#' @references Moritz, Steffen, and Thomas Bartz-Beielstein. "imputeTS: Time Series Missing Value Imputation in R." R Journal 9.1 (2017). doi:10.32614/RJ-2017-009.
#'
#' @import stats
#' @importFrom magrittr %>%
#' @importFrom utils globalVariables
#' @importFrom Rcpp sourceCpp
#' @useDynLib imputeTS
NULL
.onUnload <- function (libpath) {
library.dynam.unload("imputeTS", libpath)
}
utils::globalVariables(c("rule"))
#' @export
magrittr::`%>%`
================================================
FILE: R/internal_algorithm_interface.R
================================================
##De-Roxygenized to avoid appearance in the package documentation
# @title Algorithm selection (Internal function)
# @description Internal function for choosing between the basic univariate imputation algortihms
# @param x Supposed to be a univariate time series
# @return Time Series (\code{\link{ts}}) object that fulfills the requirements
# @author Steffen Moritz
#' @import stats
apply_base_algorithm <- function(x, algorithm, ...) {
data <- x
#checking for false input
if(algorithm == "locf")
{ data <- na_locf(data, ...) }
else if(algorithm == "mean")
{ data <- na_mean(data, ...) }
else if(algorithm == "random")
{ data <- na_random(data, ...) }
else if(algorithm == "interpolation")
{ data <- na_interpolation(data, ...) }
else if(algorithm == "kalman")
{ data <- na_kalman(data, ...) }
else if(algorithm == "ma")
{ data <- na_ma(data, ...) }
else
{
stop("Wrong parameter for option algorithm chosen.")
}
return(data)
}
================================================
FILE: R/na_interpolation.R
================================================
#' @title Missing Value Imputation by Interpolation
#'
#' @description Uses either linear, spline or stineman interpolation
#' to replace missing values.
#'
#' @param x Numeric Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object in which missing values shall be replaced
#'
#' @param option Algorithm to be used. Accepts the following input:
#' \itemize{
#' \item{"linear" - for linear interpolation using \link{approx} } (default choice)
#' \item{"spline" - for spline interpolation using \link{spline}}
#' \item{"stine" - for Stineman interpolation using \link[stinepack]{stinterp}}
#' }
#'
#' @param maxgap Maximum number of successive NAs to still perform imputation on.
#' Default setting is to replace all NAs without restrictions. With this
#' option set, consecutive NAs runs, that are longer than 'maxgap' will
#' be left NA. This option mostly makes sense if you want to
#' treat long runs of NA afterwards separately.
#'
#' @param ... Additional parameters to be passed through to \link{approx} or
#' \link{spline} interpolation functions
#'
#' @return Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object (dependent on given input at parameter x)
#'
#' @details Missing values get replaced by values of \link{approx}, \link{spline}
#' or \link[stinepack]{stinterp} interpolation.
#'
#' The na_interpolation function also supports the use of additional parameters from the respective
#' underlying interpolation functions. While usually not really needed, it is useful to know that
#' this advanced use is in principle possible. These additional parameters are not specified explicitly
#' in the na_interpolation function documentation. Take a look into the documentation of the \link[stinepack]{stinterp}, \link{approx} and \link{spline} functions to get an overview about these additional parameters.
#'
#' An example for such a parameter is the 'method' argument of spline, which can be used to
#' further specify the type of spline to be used. Possible values are "fmm", "natural",
#' "periodic", "monoH.FC" and "hyman" (as can be seen in the \link{spline}
#' documentation). The respective function call using this additional parameter would
#' look like this:
#' \code{na_interpolation(x, option ="spline", method ="natural")}
#'
#' Like in this example other additional detail parameters (gained from \link{approx},
#' \link{spline}, \link[stinepack]{stinterp} documentation) can be used by just including
#' them in the na_interpolation function call. As already mentioned, these advanced possibilities
#' for settings parameters are only helpful for specific use cases. For regular use
#' the standard parameters provided directly in the na_interpolation documentation should be
#' more than enough.
#'
#'
#' @author Steffen Moritz, Ron Hause
#'
#' @seealso \code{\link[imputeTS]{na_kalman}}, \code{\link[imputeTS]{na_locf}},
#' \code{\link[imputeTS]{na_ma}}, \code{\link[imputeTS]{na_mean}},
#' \code{\link[imputeTS]{na_random}}, \code{\link[imputeTS]{na_replace}},
#' \code{\link[imputeTS]{na_seadec}}, \code{\link[imputeTS]{na_seasplit}}
#'
#' @examples
#' # Prerequisite: Create Time series with missing values
#' x <- ts(c(2, 3, 4, 5, 6, NA, 7, 8))
#'
#' # Example 1: Perform linear interpolation
#' na_interpolation(x)
#'
#' # Example 2: Perform spline interpolation
#' na_interpolation(x, option = "spline")
#'
#' # Example 3: Perform stine interpolation
#' na_interpolation(x, option = "stine")
#'
#' # Example 4: Perform linear interpolation, with additional parameter pass through from spline()
#' # Take a look at the 'Details' section of the na_interpolation documentation
#' # for more information about advanced parameter pass through options
#' na_interpolation(x, option ="spline", method ="natural")
#'
#' # Example 5: Same as example 1, just written with pipe operator
#' x %>% na_interpolation()
#'
#' # Example 6: Same as example 2, just written with pipe operator
#' x %>% na_interpolation(option = "spline")
#' @references Johannesson, Tomas, et al. (2015). "Package stinepack".
#' @importFrom stats ts approx spline
#' @importFrom methods hasArg
#' @importFrom stinepack stinterp
#' @importFrom magrittr %>%
#' @export
na_interpolation <- function(x, option = "linear", maxgap = Inf, ...) {
# Variable 'data' is used for all transformations to the time series
# 'x' needs to stay unchanged to be able to return the same ts class in the end
data <- x
#----------------------------------------------------------
# Mulivariate Input
# The next 20 lines are just for checking and handling multivariate input.
#----------------------------------------------------------
# Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
# Go through columns and impute them by calling this function with univariate input
for (i in 1:dim(data)[2]) {
if (!anyNA(data[, i])) {
next
}
# if imputing a column does not work - mostly because it is not numeric - the column is left unchanged
tryCatch(
data[, i] <- na_interpolation(data[, i], option, maxgap),
error = function(cond) {
warning(paste(
"na_interpolation: No imputation performed for column", i, "of the input dataset.
Reason:", cond[1]
), call. = FALSE)
}
)
}
return(data)
}
#----------------------------------------------------------
# Univariate Input
# All relveant imputation / pre- postprocessing code is within this part
#----------------------------------------------------------
else {
missindx <- is.na(data)
##
## 1. Input Check and Transformation
##
# 1.1 Check if NAs are present
if (!anyNA(data)) {
return(x)
}
# 1.2 special handling data types
if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.3 Check for algorithm specific minimum amount of non-NA values
if (sum(!missindx) < 2) {
stop("At least 2 non-NA data points required in the time series to apply na_interpolation.")
}
# 1.4 Checks and corrections for wrong data dimension
# Check if input dimensionality is not as expected
if (!is.null(dim(data)[2]) && !dim(data)[2] == 1) {
stop("Wrong input type for parameter x.")
}
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric.")
}
##
## End Input Check
##
##
## 2. Imputation Code
##
n <- length(data)
allindx <- 1:n
indx <- allindx[!missindx]
data_vec <- as.vector(data)
# Linear Interpolation
if (option == "linear") {
# Check if 'rule' is used in function call, to allow parameter pass through for rule
# Needed since parameter pass through via (...) to approx does not work, when value for 'rule' is also set in the code.
if (methods::hasArg(rule)) {
interp <- stats::approx(indx, data_vec[indx], 1:n, ...)$y
}
else {
interp <- stats::approx(indx, data_vec[indx], 1:n, rule = 2, ...)$y
}
}
# Spline Interpolation
else if (option == "spline") {
interp <- stats::spline(indx, data_vec[indx], n = n, ...)$y
}
# Stineman Interpolation
else if (option == "stine") {
interp <- stinepack::stinterp(indx, data_vec[indx], 1:n, ...)$y
# avoid NAs at the beginning and end of series // same behavior like
# for approx with rule = 2.
if (any(is.na(interp))) {
interp <- na_locf(interp, na_remaining = "rev")
}
}
# Wrong parameter option
else {
stop("Wrong parameter 'option' given. Value must be either 'linear', 'spline' or 'stine'.")
}
# Merge interpolated values back into original time series
data[missindx] <- interp[missindx]
##
## End Imputation Code
##
##
## 3. Post Processing
##
# 3.1 Check for Maxgap option
# If maxgap = Inf then do nothing and when maxgap is lower than 0
if (is.finite(maxgap) && maxgap >= 0) {
# Get logical vector of the time series via is.na() and then get the
# run-length encoding of it. The run-length encoding describes how long
# the runs of FALSE and TRUE are
rlencoding <- rle(is.na(x))
# Runs smaller than maxgap (which shall still be imputed) are set FALSE
rlencoding$values[rlencoding$lengths <= maxgap] <- FALSE
# The original vector is being reconstructed by reverse.rls, only now the
# longer runs are replaced now in the logical vector derived from is.na()
# in the beginning all former NAs that are > maxgap are also FALSE
en <- inverse.rle(rlencoding)
# Set all positions in the imputed series with gaps > maxgap to NA
# (info from en vector)
data[en == TRUE] <- NA
}
##
## End Post Processing
##
##
## 4. Final Output Formatting
##
# Give back the object originally supplied to the function
# (necessary for multivariate input with only 1 column)
if (!is.null(dim(x)[2])) {
x[, 1] <- data
return(x)
}
##
## End Final Output Formatting
##
return(data)
}
}
================================================
FILE: R/na_kalman.R
================================================
#' @title Missing Value Imputation by Kalman Smoothing and State Space Models
#'
#' @description Uses Kalman Smoothing on structural time series models
#' (or on the state space representation of an arima model) for imputation.
#'
#' @param x Numeric Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object in which missing values shall be replaced
#'
#' @param model Model to be used. With this parameter the State Space Model
#' (on which KalmanSmooth is performed) can be chosen. Accepts the following input:
#'
#' \itemize{
#'
#' \item{"StructTS" - For using a structural model fitted by maximum
#' likelihood (using \link[stats]{StructTS}) } (default choice)
#'
#' \item{"auto.arima" - For using the state space representation of
#' arima model (using \link[forecast]{auto.arima})}
#'
#' }
#'
#' For both auto.arima and StructTS additional parameters for model building can
#' be given with the \dots parameter
#'
#' Additionally it is also possible to use a user created state space model
#' (See code Example 5). This state space model could for example be
#' obtained from another R package for structural time series modeling.
#' Furthermore providing the state space representation of a arima model
#' from \link[stats]{arima} is also possible. But it is important to note,
#' that user created state space models must meet the requirements specified
#' under \link[stats]{KalmanLike}. This means the user supplied state space
#' model has to be in form of a list with at least components T, Z, h , V, a, P, Pn.
#' (more details under \link[stats]{KalmanLike})
#'
#' @param smooth if \code{TRUE} - \code{\link[stats]{KalmanSmooth}} is used for
#' estimation, if \code{FALSE} - \code{\link[stats]{KalmanRun}} is used.
#' Since KalmanRun is often considered extrapolation KalmanSmooth is usually
#' the better choice for imputation.
#'
#' @param nit Parameter from Kalman Filtering (see \link[stats]{KalmanLike}).
#' Usually no need to change from default.
#'
#' @param maxgap Maximum number of successive NAs to still perform imputation on.
#' Default setting is to replace all NAs without restrictions. With this
#' option set, consecutive NAs runs, that are longer than 'maxgap' will
#' be left NA. This option mostly makes sense if you want to
#' treat long runs of NA afterwards separately.
#'
#' @param ... Additional parameters to be passed through to the functions that
#' build the State Space Models (\link[stats]{StructTS} or \link[forecast]{auto.arima}).
#'
#' @return Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object (dependent on given input at parameter x)
#'
#' @details The KalmanSmoother used in this function is \code{\link[stats]{KalmanSmooth}}.
#' It operates either on a \code{Basic Structural Model} obtained by
#' \code{\link[stats]{StructTS}} or the state space representation of a ARMA model
#' obtained by \code{\link[forecast]{auto.arima}}.
#'
#' For an detailed explanation of Kalman Filtering and Space Space Models the
#' following literature is a good starting point:
#' \itemize{
#' \item{\cite{G. Welch, G. Bishop, An Introduction to the Kalman Filter. SIGGRAPH 2001 Course 8, 1995}}
#' \item{\cite{Harvey, Andrew C. Forecasting, structural time series models and the Kalman filter. Cambridge university press, 1990} }
#' \item{\cite{Grewal, Mohinder S. Kalman filtering. Springer Berlin Heidelberg, 2011}}
#' }
#'
#' @author Steffen Moritz
#' @seealso \code{\link[imputeTS]{na_interpolation}},
#' \code{\link[imputeTS]{na_locf}},
#' \code{\link[imputeTS]{na_ma}}, \code{\link[imputeTS]{na_mean}},
#' \code{\link[imputeTS]{na_random}}, \code{\link[imputeTS]{na_replace}},
#' \code{\link[imputeTS]{na_seadec}}, \code{\link[imputeTS]{na_seasplit}}
#'
#' @examples
#' # Example 1: Perform imputation with KalmanSmoother and state space representation of arima model
#' na_kalman(tsAirgap)
#'
#' # Example 2: Perform imputation with KalmanRun and state space representation of arima model
#' na_kalman(tsAirgap, smooth = FALSE)
#'
#' # Example 3: Perform imputation with KalmanSmooth and StructTS model
#' na_kalman(tsAirgap, model = "StructTS", smooth = TRUE)
#'
#' # Example 4: Perform imputation with KalmanSmooth and StructTS model with additional parameters
#' na_kalman(tsAirgap, model = "StructTS", smooth = TRUE, type = "trend")
#'
#' # Example 5: Perform imputation with KalmanSmooth and user created model
#' usermodel <- arima(tsAirgap, order = c(1, 0, 1))$model
#' na_kalman(tsAirgap, model = usermodel)
#'
#' # Example 6: Same as example 1, just written with pipe operator
#' tsAirgap %>% na_kalman()
#' @references Hyndman RJ and Khandakar Y (2008). "Automatic time series forecasting: the forecast package for R". Journal of Statistical Software, 26(3).
#' @importFrom stats StructTS KalmanSmooth KalmanRun arima
#' @importFrom forecast auto.arima
#' @importFrom magrittr %>%
#' @export
na_kalman <- function(x, model = "StructTS", smooth = TRUE, nit = -1, maxgap = Inf, ...) {
# Variable 'data' is used for all transformations to the time series
# 'x' needs to stay unchanged to be able to return the same ts class in the end
data <- x
#----------------------------------------------------------
# Mulivariate Input
# The next 20 lines are just for checking and handling multivariate input.
#----------------------------------------------------------
# Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
# Go through columns and impute them by calling this function with univariate input
for (i in 1:dim(data)[2]) {
if (!anyNA(data[, i])) {
next
}
# if imputing a column does not work - mostly because it is not numeric - the column is left unchanged
tryCatch(
data[, i] <- na_kalman(data[, i], model, smooth, nit, maxgap, ...),
error = function(cond) {
warning(paste(
"na_kalman: No imputation performed for column", i, "of the input dataset.
Reason:", cond[1]
), call. = FALSE)
}
)
}
return(data)
}
#----------------------------------------------------------
# Univariate Input
# All relveant imputation / pre- postprocessing code is within this part
#----------------------------------------------------------
else {
missindx <- is.na(data)
##
## 1. Input Check and Transformation
##
# 1.1 Check if NAs are present
if (!anyNA(data)) {
return(x)
}
# 1.2 special handling data types
if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.3 Check for algorithm specific minimum amount of non-NA values
if (sum(!missindx) < 3) {
stop("At least 3 non-NA data points required in the time series to apply na_kalman.")
}
# 1.4 Checks and corrections for wrong data dimension
# Check if input dimensionality is not as expected
if (!is.null(dim(data)[2]) && !dim(data)[2] == 1) {
stop("Wrong input type for parameter x.")
}
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric.")
}
# 1.6 Check if type of parameter smooth is correct
if (!is.logical(smooth)) {
stop("Parameter smooth must be of type logical ( TRUE / FALSE).")
}
# 1.7 Transformation to numeric as 'int' can't be given to KalmanRun
data[1:length(data)] <- as.numeric(data)
# 1.8 Check for and mitigate all constant values in combination with StructTS
# See https://github.com/SteffenMoritz/imputeTS/issues/26
if (is.character(model) && model == "StructTS" && length(unique(as.vector(data))) == 2) {
return(na_interpolation(x))
}
##
## End Input Check and Transformation
##
##
## 2. Imputation Code
##
# 2.1 Selection of state space model
# State space representation of a arima model
if (model[1] == "auto.arima") {
mod <- forecast::auto.arima(data, ...)$model
}
# State space model, default is BSM - basic structural model
else if (model[1] == "StructTS") {
# Fallback, in StructTS first value is not allowed to be NA, thus take first non-NA
if (is.na(data[1])) {
data[1] <- data[which.min(is.na(data))]
}
mod <- stats::StructTS(data, ...)$model0
}
# User supplied model e.g. created with arima() or other state space models from other packages
else {
mod <- model
if (length(mod) < 7) {
stop("Parameter model has either to be \"StructTS\"/\"auto.arima\" or a user supplied model in
form of a list with at least components T, Z, h , V, a, P, Pn specified.")
}
if (is.null(mod$Z)) {
stop("Something is wrong with the user supplied model. Either choose \"auto.arima\" or \"StructTS\"
or supply a state space model with at least components T, Z, h , V, a, P, Pn as specified
under Details on help page for KalmanLike.")
}
}
# 2.2 Selection if KalmanSmooth or KalmanRun
if (smooth == TRUE) {
kal <- stats::KalmanSmooth(data, mod, nit)
erg <- kal$smooth # for kalmanSmooth
}
else {
kal <- stats::KalmanRun(data, mod, nit)
erg <- kal$states # for kalmanrun
}
# Check if everything is right with the model
if (dim(erg)[2] != length(mod$Z)) {
stop("Error with number of components $Z.")
}
# 2.3 Getting Results
# Out of all components in $states or$smooth only the ones
# which have 1 or -1 in $Z are in the model
# Therefore matrix multiplication is done
karima <- erg[missindx, , drop = FALSE] %*% as.matrix(mod$Z)
# Add imputations to the initial dataset
data[missindx] <- karima
##
## End Imputation Code
##
##
## 3. Post Processing
##
# 3.1 Check for Maxgap option
# If maxgap = Inf then do nothing and when maxgap is lower than 0
if (is.finite(maxgap) && maxgap >= 0) {
# Get logical vector of the time series via is.na() and then get the
# run-length encoding of it. The run-length encoding describes how long
# the runs of FALSE and TRUE are
rlencoding <- rle(is.na(x))
# Runs smaller than maxgap (which shall still be imputed) are set FALSE
rlencoding$values[rlencoding$lengths <= maxgap] <- FALSE
# The original vector is being reconstructed by reverse.rls, only now the
# longer runs are replaced now in the logical vector derived from is.na()
# in the beginning all former NAs that are > maxgap are also FALSE
en <- inverse.rle(rlencoding)
# Set all positions in the imputed series with gaps > maxgap to NA
# (info from en vector)
data[en == TRUE] <- NA
}
##
## End Post Processing
##
##
## 4. Final Output Formatting
##
# Give back the object originally supplied to the function
# (necessary for multivariate input with only 1 column)
if (!is.null(dim(x)[2])) {
x[, 1] <- data
return(x)
}
##
## End Final Output Formatting
##
return(data)
}
}
================================================
FILE: R/na_locf.R
================================================
#' @title Missing Value Imputation by Last Observation Carried Forward
#'
#' @description Replaces each missing value with the most recent present value
#' prior to it (Last Observation Carried Forward- LOCF). Optionally this can
#' also be done starting from the back of the series (Next Observation Carried
#' Backward - NOCB).
#'
#' @param x Numeric Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object in which missing values shall be replaced
#'
#' @param option Algorithm to be used. Accepts the following input:
#' \itemize{
#' \item{"locf" - for Last Observation Carried Forward} (default choice)
#' \item{"nocb" - for Next Observation Carried Backward}
#' }
#'
#' @param na_remaining Method to be used for remaining NAs.
#' \itemize{
#' \item{"rev" - to perform nocb / locf from the reverse direction} (default choice)
#' \item{"keep" - to return the series with NAs}
#' \item{"rm" - to remove remaining NAs}
#' \item{"mean" - to replace remaining NAs by overall mean}
#' }
#'
#' @param maxgap Maximum number of successive NAs to still perform imputation on.
#' Default setting is to replace all NAs without restrictions. With this
#' option set, consecutive NAs runs, that are longer than 'maxgap' will
#' be left NA. This option mostly makes sense if you want to
#' treat long runs of NA afterwards separately.
#'
#' @return Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object (dependent on given input at parameter x)
#'
#' @details
#'
#' ## General Functionality
#' Replaces each missing value with the most recent present value
#' prior to it (Last Observation Carried Forward - LOCF). This can also be
#' done in reverse direction, starting from the end of the series (then
#' called Next Observation Carried Backward - NOCB).
#'
#'
#' ## Handling for NAs at the beginning of the series
#' In case one or more successive observations directly at the start of the
#' time series are NA, there exists no 'last value' yet, that can be carried
#' forward. Thus, no LOCF imputation can be performed for these NAs. As soon
#' as the first non-NA value appears, LOCF can be performed as expected. The
#' same applies to NOCB, but from the opposite direction.
#'
#' While this problem might appear seldom and will only affect a very small
#' amount of values at the beginning, it is something to consider.
#' The \code{na_remaining} parameter helps to define, what should happen
#' with these values at the start, that would remain NA after pure LOCF.
#'
#' Default setting is \code{na_remaining = "rev"}, which performs
#' nocb / locf from the other direction to fill these NAs. So a NA
#' at the beginning will be filled with the next non-NA value appearing
#' in the series.
#'
#' With \code{na_remaining = "keep"} NAs at the beginning (that can not
#' be imputed with pure LOCF) are just left as remaining NAs.
#'
#' With \code{na_remaining = "rm"} NAs at the beginning of the series are
#' completely removed. Thus, the time series is basically shortened.
#'
#' Also available is \code{na_remaining = "mean"}, which uses the overall
#' mean of the time series to replace these remaining NAs. (but beware,
#' mean is usually not a good imputation choice - even if it only affects
#' the values at the beginning)
#'
#' @author Steffen Moritz
#'
#' @seealso \code{\link[imputeTS]{na_interpolation}},
#' \code{\link[imputeTS]{na_kalman}},
#' \code{\link[imputeTS]{na_ma}}, \code{\link[imputeTS]{na_mean}},
#' \code{\link[imputeTS]{na_random}}, \code{\link[imputeTS]{na_replace}},
#' \code{\link[imputeTS]{na_seadec}}, \code{\link[imputeTS]{na_seasplit}}
#'
#' @examples
#' # Prerequisite: Create Time series with missing values
#' x <- ts(c(NA, 3, 4, 5, 6, NA, 7, 8))
#'
#' # Example 1: Perform LOCF
#' na_locf(x)
#'
#' # Example 2: Perform NOCF
#' na_locf(x, option = "nocb")
#'
#' # Example 3: Perform LOCF and remove remaining NAs
#' na_locf(x, na_remaining = "rm")
#'
#' # Example 4: Same as example 1, just written with pipe operator
#' x %>% na_locf()
#' @importFrom stats ts
#' @importFrom magrittr %>%
#' @export
na_locf <- function(x, option = "locf", na_remaining = "rev", maxgap = Inf) {
# Variable 'data' is used for all transformations to the time series
# 'x' needs to stay unchanged to be able to return the same ts class in the end
data <- x
#----------------------------------------------------------
# Mulivariate Input
# The next 20 lines are just for checking and handling multivariate input.
#----------------------------------------------------------
# Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
# Go through columns and impute them by calling this function with univariate input
for (i in 1:dim(data)[2]) {
if (!anyNA(data[, i])) {
next
}
# if imputing a column does not work - mostly because it is not numeric - the column is left unchanged
tryCatch(
data[, i] <- na_locf(data[, i], option, na_remaining, maxgap),
error = function(cond) {
warning(paste(
"na_locf: No imputation performed for column", i, "of the input dataset.
Reason:", cond[1]
), call. = FALSE)
}
)
}
return(data)
}
#----------------------------------------------------------
# Univariate Input
# All relveant imputation / pre- postprocessing code is within this part
#----------------------------------------------------------
else {
missindx <- is.na(data)
##
## 1. Input Check and Transformation
##
# 1.1 Check if NAs are present
if (!anyNA(data)) {
return(x)
}
# 1.2 special handling data types
if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.3 Check for algorithm specific minimum amount of non-NA values
if (all(missindx)) {
stop("Input data has only NA values. At least 1 non-NA data point required in the time series to apply na_locf.")
}
# 1.4 Checks and corrections for wrong data dimension
# Check if input dimensionality is not as expected
if (!is.null(dim(data)[2]) && !dim(data)[2] == 1) {
stop("Wrong input type for parameter x.")
}
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric.")
}
##
## End Input Check and Transformation
##
##
## 2. Imputation Code
##
# 2.1 Perform locf or nocb
# Input as vector
data_vec <- as.vector(data)
# Last observation carried forward // f = 0
if (option == "locf") {
imputed <- locf(data_vec, FALSE)
}
# Next observation carried backward // f = 1
else if (option == "nocb") {
imputed <- locf(data_vec, TRUE)
}
# Wrong input
else {
stop("Wrong parameter 'option' given. Value must be either 'locf' or 'nocb'.")
}
data[missindx] <- imputed[missindx]
# 2.2 Handle remaining NAs - na_remaining param
# no remaining NAs or keep NAs selected -> do nothing
if (!anyNA(data) || na_remaining == "keep") {
# do nothing
}
# Replace NAs through locf/nocb from the other direction
else if (na_remaining == "rev") {
if (option == "locf") {
data <- na_locf(data, option = "nocb")
}
else if (option == "nocb") {
data <- na_locf(data, option = "locf")
}
}
# Remove all NAs
else if (na_remaining == "rm") {
data <- na_remove(data)
}
# Replace NAs with overall mean
else if (na_remaining == "mean") {
data <- na_mean(data)
}
# Wrong Input
else {
stop("Wrong parameter 'na_remaining' given. Value must be either 'keep', 'rm', 'mean' or 'rev'.")
}
##
## End Imputation Code
##
##
## 3. Post Processing
##
# 3.1 Check for Maxgap option
# If maxgap = Inf then do nothing and when maxgap is lower than 0
if (is.finite(maxgap) && maxgap >= 0) {
# Get logical vector of the time series via is.na() and then get the
# run-length encoding of it. The run-length encoding describes how long
# the runs of FALSE and TRUE are
rlencoding <- rle(is.na(x))
# Runs smaller than maxgap (which shall still be imputed) are set FALSE
rlencoding$values[rlencoding$lengths <= maxgap] <- FALSE
# The original vector is being reconstructed by reverse.rls, only now the
# longer runs are replaced now in the logical vector derived from is.na()
# in the beginning all former NAs that are > maxgap are also FALSE
en <- inverse.rle(rlencoding)
# Set all positions in the imputed series with gaps > maxgap to NA
# (info from en vector)
data[en == TRUE] <- NA
}
##
## End Post Processing
##
##
## 4. Final Output Formatting
##
# Give back the object originally supplied to the function
# (necessary for multivariate input with only 1 column)
if (!is.null(dim(x)[2])) {
x[, 1] <- data
return(x)
}
##
## End Final Output Formatting
##
return(data)
}
}
================================================
FILE: R/na_ma.R
================================================
#' @title Missing Value Imputation by Weighted Moving Average
#'
#' @description Missing value replacement by weighted moving average.
#' Uses semi-adaptive window size to ensure all NAs are replaced.
#'
#' @param x Numeric Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object in which missing values shall be replaced
#'
#' @param weighting Weighting to be used. Accepts the following input:
#' \itemize{
#' \item{"simple" - Simple Moving Average (SMA)}
#' \item{"linear" - Linear Weighted Moving Average (LWMA)}
#' \item{"exponential" - Exponential Weighted Moving Average (EWMA)} (default choice)
#' }
#'
#' @param k integer width of the moving average window. Expands to both sides
#' of the center element e.g. k=2 means 4 observations (2 left, 2 right) are
#' taken into account. If all observations in the current window are NA, the
#' window size is automatically increased until there are at least 2 non-NA
#' values present.
#'
#' @param maxgap Maximum number of successive NAs to still perform imputation on.
#' Default setting is to replace all NAs without restrictions. With this
#' option set, consecutive NAs runs, that are longer than 'maxgap' will
#' be left NA. This option mostly makes sense if you want to
#' treat long runs of NA afterwards separately.
#'
#' @return Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object (dependent on given input at parameter x)
#'
#' @details In this function missing values get replaced by moving average
#' values. Moving Averages are also sometimes referred to as "moving mean",
#' "rolling mean", "rolling average" or "running average".
#'
#' The mean in this implementation taken from an equal number of observations
#' on either side of a central value. This means for an NA value at position
#' \code{i} of a time series, the observations i-1,i+1 and i+1, i+2 (assuming
#' a window size of k=2) are used to calculate the mean.
#'
#' Since it can in case of long NA gaps also occur, that all values next to the
#' central value are also NA, the algorithm has a semi-adaptive window size.
#' Whenever there are less than 2 non-NA values in the complete window available,
#' the window size is incrementally increased, till at least 2 non-NA values are
#' there. In all other cases the algorithm sticks to the pre-set window size.
#'
#' There are options for using Simple Moving Average (SMA), Linear Weighted
#' Moving Average (LWMA) and Exponential Weighted Moving Average (EWMA).
#'
#' SMA: all observations in the window are equally weighted for calculating the mean.
#'
#' LWMA: weights decrease in arithmetical progression. The observations
#' directly next to a central value i, have weight 1/2, the observations
#' one further away (i-2,i+2) have weight 1/3, the next (i-3,i+3) have
#' weight 1/4, ...
#'
#' EWMA: uses weighting factors which decrease exponentially. The observations
#' directly next to a central value i, have weight 1/2^1, the observations one
#' further away (i-2,i+2) have weight 1/2^2, the next (i-3,i+3) have weight 1/2^3, ...
#'
#'
#' @author Steffen Moritz
#'
#' @seealso \code{\link[imputeTS]{na_interpolation}},
#' \code{\link[imputeTS]{na_kalman}}, \code{\link[imputeTS]{na_locf}},
#' \code{\link[imputeTS]{na_mean}},
#' \code{\link[imputeTS]{na_random}}, \code{\link[imputeTS]{na_replace}},
#' \code{\link[imputeTS]{na_seadec}}, \code{\link[imputeTS]{na_seasplit}}
#'
#' @examples
#' # Example 1: Perform imputation with simple moving average
#' na_ma(tsAirgap, weighting = "simple")
#'
#' # Example 2: Perform imputation with exponential weighted moving average
#' na_ma(tsAirgap)
#'
#' # Example 3: Perform imputation with exponential weighted moving average, window size 6
#' na_ma(tsAirgap, k = 6)
#'
#' # Example 4: Same as example 1, just written with pipe operator
#' tsAirgap %>% na_ma(weighting = "simple")
#' @importFrom magrittr %>%
#' @export
na_ma <- function(x, k = 4, weighting = "exponential", maxgap = Inf) {
# Variable 'data' is used for all transformations to the time series
# 'x' needs to stay unchanged to be able to return the same ts class in the end
data <- x
#----------------------------------------------------------
# Mulivariate Input
# The next 20 lines are just for checking and handling multivariate input.
#----------------------------------------------------------
# Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
# Go through columns and impute them by calling this function with univariate input
for (i in 1:dim(data)[2]) {
if (!anyNA(data[, i])) {
next
}
# if imputing a column does not work - mostly because it is not numeric - the column is left unchanged
tryCatch(
data[, i] <- na_ma(data[, i], k, weighting, maxgap),
error = function(cond) {
warning(paste(
"na_ma: No imputation performed for column", i, "of the input dataset.
Reason:", cond[1]
), call. = FALSE)
}
)
}
return(data)
}
#----------------------------------------------------------
# Univariate Input
# All relveant imputation / pre- postprocessing code is within this part
#----------------------------------------------------------
else {
missindx <- is.na(data)
##
## 1. Input Check and Transformation
##
# 1.1 Check if NAs are present
if (!anyNA(data)) {
return(x)
}
# 1.2 special handling data types
if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.3 Check for algorithm specific minimum amount of non-NA values
if (sum(!missindx) < 2) {
stop("At least 2 non-NA data points required in the time series to apply na_ma.")
}
# 1.4 Checks and corrections for wrong data dimension
# Check if input dimensionality is not as expected
if (!is.null(dim(data)[2]) && !dim(data)[2] == 1) {
stop("Wrong input type for parameter x.")
}
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric.")
}
# 1.6 Check for wrong values of param k
if (k < 1) {
stop("Parameter k has to be larger than 0.")
}
##
## End Input Check and Transformation
##
##
## 2. Imputation Code
##
# Imputation is performed i C++ code na_ma.cpp
data <- ma(data, k, weighting)
##
## End Imputation Code
##
##
## 3. Post Processing
##
# 3.1 Check for Maxgap option
# If maxgap = Inf then do nothing and when maxgap is lower than 0
if (is.finite(maxgap) && maxgap >= 0) {
# Get logical vector of the time series via is.na() and then get the
# run-length encoding of it. The run-length encoding describes how long
# the runs of FALSE and TRUE are
rlencoding <- rle(is.na(x))
# Runs smaller than maxgap (which shall still be imputed) are set FALSE
rlencoding$values[rlencoding$lengths <= maxgap] <- FALSE
# The original vector is being reconstructed by reverse.rls, only now the
# longer runs are replaced now in the logical vector derived from is.na()
# in the beginning all former NAs that are > maxgap are also FALSE
en <- inverse.rle(rlencoding)
# Set all positions in the imputed series with gaps > maxgap to NA
# (info from en vector)
data[en == TRUE] <- NA
}
##
## End Post Processing
##
##
## 4. Final Output Formatting
##
# Give back the object originally supplied to the function
# (necessary for multivariate input with only 1 column)
if (!is.null(dim(x)[2])) {
x[, 1] <- data
return(x)
}
##
## End Final Output Formatting
##
return(data)
}
}
================================================
FILE: R/na_mean.R
================================================
#' @title Missing Value Imputation by Mean Value
#'
#' @description Missing value replacement by mean values. Different means
#' like median, mean, mode possible.
#'
#' @param x Numeric Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object in which missing values shall be replaced
#'
#' @param option Algorithm to be used. Accepts the following input:
#' \itemize{
#' \item{"mean" - take the mean for imputation (default choice)}
#' \item{"median" - take the median for imputation}
#' \item{"mode" - take the mode for imputation}
#' \item{"harmonic" - take the harmonic mean}
#' \item{"geometric" - take the geometric mean}
#' }
#'
#' @param maxgap Maximum number of successive NAs to still perform imputation on.
#' Default setting is to replace all NAs without restrictions. With this
#' option set, consecutive NAs runs, that are longer than 'maxgap' will
#' be left NA. This option mostly makes sense if you want to
#' treat long runs of NA afterwards separately.
#'
#' @return Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object (dependent on given input at parameter x)
#'
#' @details Missing values get replaced by overall mean values. The function
#' calculates the mean, median, mode, harmonic or geometric mean over all the non-NA
#' values and replaces all NAs with this value. Option 'mode' replaces NAs with
#' the most frequent value in the time series. If two or more values occur equally frequent,
#' the function imputes the lower value. Due to their calculation formula geometric and harmonic
#' mean are not well defined for negative values or zero values in the input series.
#'
#' In general using the mean for imputation imputation is mostly a suboptimal choice and should
#' be handled with great caution.
#'
#' @author Steffen Moritz
#'
#' @seealso \code{\link[imputeTS]{na_interpolation}},
#' \code{\link[imputeTS]{na_kalman}}, \code{\link[imputeTS]{na_locf}},
#' \code{\link[imputeTS]{na_ma}},
#' \code{\link[imputeTS]{na_random}}, \code{\link[imputeTS]{na_replace}},
#' \code{\link[imputeTS]{na_seadec}}, \code{\link[imputeTS]{na_seasplit}}
#'
#' @examples
#' # Prerequisite: Create Time series with missing values
#' x <- ts(c(2, 3, 4, 5, 6, NA, 7, 8))
#'
#' # Example 1: Perform imputation with the overall mean
#' na_mean(x)
#'
#' # Example 2: Perform imputation with overall median
#' na_mean(x, option = "median")
#'
#' # Example 3: Same as example 1, just written with pipe operator
#' x %>% na_mean()
#' @importFrom magrittr %>%
#' @importFrom stats median ts
#' @export
#'
na_mean <- function(x, option = "mean", maxgap = Inf) {
# Variable 'data' is used for all transformations to the time series
# 'x' needs to stay unchanged to be able to return the same ts class in the end
data <- x
#----------------------------------------------------------
# Mulivariate Input
# The next 20 lines are just for checking and handling multivariate input.
#----------------------------------------------------------
# Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
# Go through columns and impute them by calling this function with univariate input
for (i in 1:dim(data)[2]) {
if (!anyNA(data[, i])) {
next
}
# if imputing a column does not work - mostly because it is not numeric - the column is left unchanged
tryCatch(
data[, i] <- na_mean(data[, i], option, maxgap),
error = function(cond) {
warning(paste(
"na_mean: No imputation performed for column", i, "of the input dataset.
Reason:", cond[1]
), call. = FALSE)
}
)
}
return(data)
}
#----------------------------------------------------------
# Univariate Input
# All relveant imputation / pre- postprocessing code is within this part
#----------------------------------------------------------
else {
missindx <- is.na(data)
##
## 1. Input Check and Transformation
##
# 1.1 Check if NAs are present
if (!anyNA(data)) {
return(x)
}
# 1.2 special handling data types
if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.3 Check for algorithm specific minimum amount of non-NA values
if (all(missindx)) {
stop("Input data has only NA values. At least 1 non-NA data point required in the time series to apply na_mean.")
}
# 1.4 Checks and corrections for wrong data dimension
# Check if input dimensionality is not as expected
if (!is.null(dim(data)[2]) && !dim(data)[2] == 1) {
stop("Wrong input type for parameter x.")
}
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.5 Check if input is numeric
if (!is.numeric(data)) {
stop("Input x is not numeric.")
}
##
## End Input Check and Transformation
##
##
## 2. Imputation Code
##
if (option == "median") {
# Use Median
median <- stats::median(data, na.rm = TRUE)
data[missindx] <- median
}
else if (option == "mode") {
# Calculate Mode
temp <- table(as.vector(data))
mode <- names(temp)[temp == max(temp)]
mode <- (as.numeric(mode))[1]
data[missindx] <- mode
}
else if (option == "mean") {
# Use arithmetic Mean
mean <- mean(data, na.rm = TRUE)
data[missindx] <- mean
}
else if (option == "geometric") {
# Use geometric Mean
# Check preconditions
if (any(data == 0 | data < 0, na.rm = T)) {
stop(
"The input data contains 0 and/or negative values.\n",
"The geometric and harmonic mean are not well defined for these cases.\n",
"Please another option like e.g. option = 'mean' in this case."
)
}
mean <- exp(mean(log(data), na.rm = TRUE))
data[missindx] <- mean
}
else if (option == "harmonic") {
# Use harmonic Mean
# Check preconditions
if (any(data == 0 | data < 0, na.rm = T)) {
stop(
"The input data contains 0 and/or negative values.\n",
"The geometric and harmonic mean are not well defined for these cases.\n",
"Please another option like e.g. option = 'mean' in this case."
)
}
mean <- 1 / mean(1 / data, na.rm = TRUE)
data[missindx] <- mean
}
else {
stop("Wrong 'option' parameter given, must be either: \n'mean', 'mode', 'median', 'harmonic' or 'geometric'.")
}
##
## End Imputation Code
##
##
## 3. Post Processing
##
# 3.1 Check for Maxgap option
# If maxgap = Inf then do nothing and when maxgap is lower than 0
if (is.finite(maxgap) && maxgap >= 0) {
# Get logical vector of the time series via is.na() and then get the
# run-length encoding of it. The run-length encoding describes how long
# the runs of FALSE and TRUE are
rlencoding <- rle(is.na(x))
# Runs smaller than maxgap (which shall still be imputed) are set FALSE
rlencoding$values[rlencoding$lengths <= maxgap] <- FALSE
# The original vector is being reconstructed by reverse.rls, only now the
# longer runs are replaced now in the logical vector derived from is.na()
# in the beginning all former NAs that are > maxgap are also FALSE
en <- inverse.rle(rlencoding)
# Set all positions in the imputed series with gaps > maxgap to NA
# (info from en vector)
data[en == TRUE] <- NA
}
##
## End Post Processing
##
##
## 4. Final Output Formatting
##
# Give back the object originally supplied to the function
# (necessary for multivariate input with only 1 column)
if (!is.null(dim(x)[2])) {
x[, 1] <- data
return(x)
}
##
## End Final Output Formatting
##
return(data)
}
}
================================================
FILE: R/na_random.R
================================================
#' @title Missing Value Imputation by Random Sample
#'
#' @description Replaces each missing value by drawing a random sample
#' between two given bounds.
#'
#' @param x Numeric Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object in which missing values shall be replaced
#'
#' @param lower_bound Lower bound for the random samples.
#' If nothing or NULL is set min(x) will be used.
#'
#' @param upper_bound Upper bound for the random samples.
#' If nothing or NULL is set man(x) will be used.
#'
#' @param maxgap Maximum number of successive NAs to still perform imputation on.
#' Default setting is to replace all NAs without restrictions. With this
#' option set, consecutive NAs runs, that are longer than 'maxgap' will
#' be left NA. This option mostly makes sense if you want to
#' treat long runs of NA afterwards separately.
#'
#' @return Vector (\code{\link{vector}}) or Time Series (\code{\link{ts}})
#' object (dependent on given input at parameter x)
#'
#' @details Replaces each missing value by drawing a random sample between two
#' given bounds. The default bounds are the minimum and the maximum value in
#' the non-NAs from the time series. Function uses \link{runif} function to get
#' the random values.
#'
#' @author Steffen Moritz
#'
#' @seealso \code{\link[imputeTS]{na_interpolation}},
#' \code{\link[imputeTS]{na_kalman}}, \code{\link[imputeTS]{na_locf}},
#' \code{\link[imputeTS]{na_ma}}, \code{\link[imputeTS]{na_mean}},
#' \code{\link[imputeTS]{na_replace}},
#' \code{\link[imputeTS]{na_seadec}}, \code{\link[imputeTS]{na_seasplit}}
#'
#' @examples
#' # Prerequisite: Create Time series with missing values
#' x <- ts(c(2, 3, NA, 5, 6, NA, 7, 8))
#'
#' # Example 1: Replace all NAs by random values that are between min and max of the input time series
#' na_random(x)
#'
#' # Example 2: Replace all NAs by random values between 1 and 10
#' na_random(x, lower_bound = 1, upper_bound = 10)
#'
#' # Example 3: Same as example 1, just written with pipe operator
#' x %>% na_random()
#' @importFrom stats runif ts
#' @importFrom magrittr %>%
#' @export
na_random <- function(x, lower_bound = NULL, upper_bound = NULL, maxgap = Inf) {
# Variable 'data' is used for all transformations to the time series
# 'x' needs to stay unchanged to be able to return the same ts class in the end
data <- x
#----------------------------------------------------------
# Mulivariate Input
# The next 20 lines are just for checking and handling multivariate input.
#----------------------------------------------------------
# Check if the input is multivariate
if (!is.null(dim(data)[2]) && dim(data)[2] > 1) {
# Go through columns and impute them by calling this function with univariate input
for (i in 1:dim(data)[2]) {
if (!anyNA(data[, i])) {
next
}
# if imputing a column does not work - mostly because it is not numeric - the column is left unchanged
tryCatch(
data[, i] <- na_random(data[, i], lower_bound, upper_bound, maxgap),
error = function(cond) {
warning(paste(
"na_random: No imputation performed for column", i, "of the input dataset.
Reason:", cond[1]
), call. = FALSE)
}
)
}
return(data)
}
#----------------------------------------------------------
# Univariate Input
# All relveant imputation / pre- postprocessing code is within this part
#----------------------------------------------------------
else {
missindx <- is.na(data)
##
## 1. Input Check and Transformation
##
# 1.1 Check if NAs are present
if (!anyNA(data)) {
return(x)
}
# 1.2 special handling data types
if (any(class(data) == "tbl")) {
data <- as.vector(as.data.frame(data)[, 1])
}
# 1.3 Check for algorithm specific minimum amount of non-NA values
if (sum(!missindx) < 2 && !(!is.null(upper_bound) && !is.null(lower_bound))) {
stop("At least 2 non-NA data points required in the time series to apply na_random
with the default lower_bound and upper_bound settings.")
}
# 1.4 Checks and corrections for wrong data dimension
# Check if input dimensionality is not as expected
if (!is.null(dim(data)[2]) && !dim(data)[2] == 1) {
stop("Wrong input type for parameter x.")
}
# Altering multivariate objects with 1 column (which are essentially
# univariate) to be dim = NULL
if (!is.null(dim(data)[2])) {
data <- data[, 1]
}
# 1.5 Check if input is numeric
# Combined with check if all NA present, since an all NA vector returns FALSE for is.numeric
if (!is.numeric(data) & !all(is.na(data))) {
stop("Input x is not numeric.")
}
# 1.6 Chec
gitextract_ov74y_ad/
├── .Rbuildignore
├── .github/
│ ├── .gitignore
│ └── workflows/
│ ├── R-CMD-check.yaml
│ ├── pkgdown.yaml
│ ├── pr-commands.yaml
│ └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE.txt
├── NAMESPACE
├── NEWS.md
├── R/
│ ├── .Rapp.history
│ ├── RcppExports.R
│ ├── deprecated_defunct.R
│ ├── ggplot_na_distribution.R
│ ├── ggplot_na_distribution2.R
│ ├── ggplot_na_gapsize.R
│ ├── ggplot_na_gapsize2.R
│ ├── ggplot_na_imputations.R
│ ├── imputeTS-package.R
│ ├── internal_algorithm_interface.R
│ ├── na_interpolation.R
│ ├── na_kalman.R
│ ├── na_locf.R
│ ├── na_ma.R
│ ├── na_mean.R
│ ├── na_random.R
│ ├── na_remove.R
│ ├── na_replace.R
│ ├── na_seadec.R
│ ├── na_seasplit.R
│ ├── statsNA.R
│ ├── tsAirgap.R
│ ├── tsAirgapComplete.R
│ ├── tsHeating.R
│ ├── tsHeatingComplete.R
│ ├── tsNH4.R
│ └── tsNH4Complete.R
├── README.md
├── _pkgdown.yaml
├── codecov.yml
├── data/
│ ├── tsAirgap.rda
│ ├── tsAirgapComplete.rda
│ ├── tsHeating.rda
│ ├── tsHeatingComplete.rda
│ ├── tsNH4.rda
│ └── tsNH4Complete.rda
├── docs/
│ ├── 404.html
│ ├── articles/
│ │ ├── gallery_visualizations.html
│ │ ├── gallery_visualizations_files/
│ │ │ ├── accessible-code-block-0.0.1/
│ │ │ │ └── empty-anchor.js
│ │ │ ├── header-attrs-2.16/
│ │ │ │ └── header-attrs.js
│ │ │ └── header-attrs-2.7/
│ │ │ └── header-attrs.js
│ │ └── index.html
│ ├── authors.html
│ ├── bootstrap-toc.css
│ ├── bootstrap-toc.js
│ ├── docsearch.css
│ ├── docsearch.js
│ ├── index.html
│ ├── news/
│ │ └── index.html
│ ├── pkgdown.css
│ ├── pkgdown.js
│ ├── pkgdown.yml
│ ├── reference/
│ │ ├── figures/
│ │ │ └── Cheat_Sheet_imputeTS.pptx
│ │ ├── ggplot_na_distribution.html
│ │ ├── ggplot_na_distribution2.html
│ │ ├── ggplot_na_gapsize.html
│ │ ├── ggplot_na_gapsize2.html
│ │ ├── ggplot_na_imputations.html
│ │ ├── ggplot_na_intervals.html
│ │ ├── ggplot_na_level.html
│ │ ├── ggplot_na_level2.html
│ │ ├── ggplot_na_pattern.html
│ │ ├── imputeTS-package.html
│ │ ├── imputeTS.html
│ │ ├── index.html
│ │ ├── na.interpolation.html
│ │ ├── na.kalman.html
│ │ ├── na.locf.html
│ │ ├── na.ma.html
│ │ ├── na.mean.html
│ │ ├── na.random.html
│ │ ├── na.remove.html
│ │ ├── na.replace.html
│ │ ├── na.seadec.html
│ │ ├── na.seasplit.html
│ │ ├── na_interpolation.html
│ │ ├── na_kalman.html
│ │ ├── na_locf.html
│ │ ├── na_ma.html
│ │ ├── na_mean.html
│ │ ├── na_random.html
│ │ ├── na_remove.html
│ │ ├── na_replace.html
│ │ ├── na_seadec.html
│ │ ├── na_seasplit.html
│ │ ├── plotNA.distribution.html
│ │ ├── plotNA.distributionBar.html
│ │ ├── plotNA.gapsize.html
│ │ ├── plotNA.imputations.html
│ │ ├── reexports.html
│ │ ├── statsNA.html
│ │ ├── tsAirgap.html
│ │ ├── tsAirgapComplete.html
│ │ ├── tsHeating.html
│ │ ├── tsHeatingComplete.html
│ │ ├── tsNH4.html
│ │ └── tsNH4Complete.html
│ └── sitemap.xml
├── imputeTS.Rproj
├── inst/
│ └── CITATION
├── man/
│ ├── ggplot_na_distribution.Rd
│ ├── ggplot_na_distribution2.Rd
│ ├── ggplot_na_gapsize.Rd
│ ├── ggplot_na_gapsize2.Rd
│ ├── ggplot_na_imputations.Rd
│ ├── ggplot_na_intervals.Rd
│ ├── imputeTS-package.Rd
│ ├── na.interpolation.Rd
│ ├── na.kalman.Rd
│ ├── na.locf.Rd
│ ├── na.ma.Rd
│ ├── na.mean.Rd
│ ├── na.random.Rd
│ ├── na.remove.Rd
│ ├── na.replace.Rd
│ ├── na.seadec.Rd
│ ├── na.seasplit.Rd
│ ├── na_interpolation.Rd
│ ├── na_kalman.Rd
│ ├── na_locf.Rd
│ ├── na_ma.Rd
│ ├── na_mean.Rd
│ ├── na_random.Rd
│ ├── na_remove.Rd
│ ├── na_replace.Rd
│ ├── na_seadec.Rd
│ ├── na_seasplit.Rd
│ ├── plotNA.distribution.Rd
│ ├── plotNA.distributionBar.Rd
│ ├── plotNA.gapsize.Rd
│ ├── plotNA.imputations.Rd
│ ├── reexports.Rd
│ ├── statsNA.Rd
│ ├── tsAirgap.Rd
│ ├── tsAirgapComplete.Rd
│ ├── tsHeating.Rd
│ ├── tsHeatingComplete.Rd
│ ├── tsNH4.Rd
│ └── tsNH4Complete.Rd
├── src/
│ ├── RcppExports.cpp
│ ├── locf.cpp
│ └── ma.cpp
├── tests/
│ ├── testthat/
│ │ ├── test-apply_base_algorithm.R
│ │ ├── test-depreciated_defunct.R
│ │ ├── test-error_handling.R
│ │ ├── test-ggplot_na_distribution.R
│ │ ├── test-ggplot_na_distribution2.R
│ │ ├── test-ggplot_na_gapsize.R
│ │ ├── test-ggplot_na_gapsize2.R
│ │ ├── test-ggplot_na_imputations.R
│ │ ├── test-input-na_advanced-tsObjects.R
│ │ ├── test-na_interpolation.R
│ │ ├── test-na_kalman.R
│ │ ├── test-na_locf.R
│ │ ├── test-na_ma.R
│ │ ├── test-na_mean.R
│ │ ├── test-na_random.R
│ │ ├── test-na_remove.R
│ │ ├── test-na_replace.R
│ │ ├── test-na_seadec.R
│ │ ├── test-na_seasplit.R
│ │ ├── test-parameter-maxgap.R
│ │ └── test-statsNA.R
│ └── testthat.R
└── vignettes/
├── Cheat_Sheet_imputeTS.pdf.asis
├── Cheat_Sheet_imputeTS.pptx
├── RJournal.sty
├── gallery_visualizations.Rmd
└── imputeTS-Time-Series-Missing-Value-Imputation-in-R.ltx
SYMBOL INDEX (12 symbols across 5 files)
FILE: docs/docsearch.js
function matchedWords (line 54) | function matchedWords(hit) {
function updateHitURL (line 73) | function updateHitURL(hit) {
FILE: docs/pkgdown.js
function paths (line 42) | function paths(pathname) {
function prefix_length (line 53) | function prefix_length(needle, haystack) {
function changeTooltipMessage (line 72) | function changeTooltipMessage(element, msg) {
FILE: src/RcppExports.cpp
function RcppExport (line 15) | RcppExport SEXP _imputeTS_locf(SEXP xSEXP, SEXP reverseSEXP) {
function RcppExport (line 27) | RcppExport SEXP _imputeTS_ma(SEXP xSEXP, SEXP kSEXP, SEXP weightingSEXP) {
function RcppExport (line 45) | RcppExport void R_init_imputeTS(DllInfo *dll) {
FILE: src/locf.cpp
function locf (line 6) | Rcpp::NumericVector locf(NumericVector x, bool reverse)
FILE: src/ma.cpp
type pow_wrapper (line 5) | struct pow_wrapper {
function NumericVector (line 11) | NumericVector vecpow(const IntegerVector base, const NumericVector exp) {
function ma (line 20) | Rcpp::NumericVector ma(NumericVector x, int k, String weighting) {
Condensed preview — 180 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,293K chars).
[
{
"path": ".Rbuildignore",
"chars": 227,
"preview": "imputeTS-header.png\nimputeTS-header.jpg\nimputeTS-logo1800x2100.png\n^.*\\.Rproj$\n^\\.Rproj\\.user$\n^.*\\.yml$\n^.*\\.yaml$\n^.*\\"
},
{
"path": ".github/.gitignore",
"chars": 7,
"preview": "*.html\n"
},
{
"path": ".github/workflows/R-CMD-check.yaml",
"chars": 3241,
"preview": "# Workflow derived from https://github.com/r-lib/actions/tree/master/examples\n# Need help debugging build failures? Star"
},
{
"path": ".github/workflows/pkgdown.yaml",
"chars": 1005,
"preview": "# Workflow derived from https://github.com/r-lib/actions/tree/master/examples\n# Need help debugging build failures? Star"
},
{
"path": ".github/workflows/pr-commands.yaml",
"chars": 2320,
"preview": "# Workflow derived from https://github.com/r-lib/actions/tree/master/examples\n# Need help debugging build failures? Star"
},
{
"path": ".github/workflows/test-coverage.yaml",
"chars": 705,
"preview": "# Workflow derived from https://github.com/r-lib/actions/tree/master/examples\n# Need help debugging build failures? Star"
},
{
"path": ".gitignore",
"chars": 102,
"preview": ".Rproj.user\n/.Rhistory\n/.dropbox\n/desktop.ini\n/.RData\nIcon?\nIcon\n*.o\n*.dll\n*.so\ndoc\nMeta\n/doc/\n/Meta/\n"
},
{
"path": "DESCRIPTION",
"chars": 2006,
"preview": "Package: imputeTS\nVersion: 3.4\nDate: 2025-08-25\nTitle: Time Series Missing Value Imputation\nDescription: Imputation (rep"
},
{
"path": "LICENSE.txt",
"chars": 35141,
"preview": " GNU GENERAL PUBLIC LICENSE\n Version 3, 29 June 2007\n\n Copyright (C) 2007 Free "
},
{
"path": "NAMESPACE",
"chars": 2278,
"preview": "# Generated by roxygen2: do not edit by hand\n\nexport(\"%>%\")\nexport(ggplot_na_distribution)\nexport(ggplot_na_distribution"
},
{
"path": "NEWS.md",
"chars": 13563,
"preview": "\n# Changes in Version 3.4\n\n* Added ggplot_na_gapsize2 plot (and unit tests). \n Nice way to illustrate how different NA "
},
{
"path": "R/.Rapp.history",
"chars": 0,
"preview": ""
},
{
"path": "R/RcppExports.R",
"chars": 325,
"preview": "# Generated by using Rcpp::compileAttributes() -> do not edit by hand\n# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD"
},
{
"path": "R/deprecated_defunct.R",
"chars": 16207,
"preview": "#--------------------------------------------------------------#\n# Collection of DEPRECATED AND DEFUNCT FUNCTIONS\n#-----"
},
{
"path": "R/ggplot_na_distribution.R",
"chars": 9154,
"preview": "#' @title Line Plot to Visualize the Distribution of Missing Values\n#'\n#' @description Visualize the distribution of mis"
},
{
"path": "R/ggplot_na_distribution2.R",
"chars": 11112,
"preview": "#' @title Stacked Bar Plot to Visualize Missing Values per Time Interval\n#'\n#' @description Visualization of missing val"
},
{
"path": "R/ggplot_na_gapsize.R",
"chars": 11807,
"preview": "#' @title Bar Plot to Visualize Occurrences of Different NA Gap Sizes\n#'\n#' @description Visualize the Number of Occurre"
},
{
"path": "R/ggplot_na_gapsize2.R",
"chars": 16153,
"preview": "#' @title Bubble Plot to Visualize Total NA Count of NA gap sizes\n#'\n#' @description Visualize the total NA count (gap s"
},
{
"path": "R/ggplot_na_imputations.R",
"chars": 17746,
"preview": "#' @title Line Plot to Visualize Imputed Values\n#'\n#' @description Visualize the imputed values in a time series.\n#'\n#' "
},
{
"path": "R/imputeTS-package.R",
"chars": 1521,
"preview": "#' @keywords internal\n\"_PACKAGE\"\n\n#' @title imputeTS-package description\n#' \n#' @description \n#' The imputeTS package is"
},
{
"path": "R/internal_algorithm_interface.R",
"chars": 1009,
"preview": "##De-Roxygenized to avoid appearance in the package documentation\n\n# @title Algorithm selection (Internal function)\n# @d"
},
{
"path": "R/na_interpolation.R",
"chars": 9464,
"preview": "#' @title Missing Value Imputation by Interpolation\n#'\n#' @description Uses either linear, spline or stineman interpolat"
},
{
"path": "R/na_kalman.R",
"chars": 11350,
"preview": "#' @title Missing Value Imputation by Kalman Smoothing and State Space Models\n#'\n#' @description Uses Kalman Smoothing o"
},
{
"path": "R/na_locf.R",
"chars": 9339,
"preview": "#' @title Missing Value Imputation by Last Observation Carried Forward\n#'\n#' @description Replaces each missing value wi"
},
{
"path": "R/na_ma.R",
"chars": 7990,
"preview": "#' @title Missing Value Imputation by Weighted Moving Average\n#'\n#' @description Missing value replacement by weighted m"
},
{
"path": "R/na_mean.R",
"chars": 8036,
"preview": "#' @title Missing Value Imputation by Mean Value\n#'\n#' @description Missing value replacement by mean values. Different "
},
{
"path": "R/na_random.R",
"chars": 7471,
"preview": "#' @title Missing Value Imputation by Random Sample\n#'\n#' @description Replaces each missing value by drawing a random s"
},
{
"path": "R/na_remove.R",
"chars": 3819,
"preview": "#' @title Remove Missing Values\n#'\n#' @description Removes all missing values from a time series.\n#'\n#' @param x Numeric"
},
{
"path": "R/na_replace.R",
"chars": 5490,
"preview": "#' @title Replace Missing Values by a Defined Value\n#'\n#' @description Replaces all missing values with a given value.\n#"
},
{
"path": "R/na_seadec.R",
"chars": 10820,
"preview": "#' @title Seasonally Decomposed Missing Value Imputation\n#'\n#' @description Removes the seasonal component from the time"
},
{
"path": "R/na_seasplit.R",
"chars": 8365,
"preview": "#' @title Seasonally Splitted Missing Value Imputation\n#'\n#' @description Splits the times series into seasons and after"
},
{
"path": "R/statsNA.R",
"chars": 7726,
"preview": "\n#' @title Print Statistics about Missing Values\n#'\n#' @description Print summary stats about the distribution of\n#' mis"
},
{
"path": "R/tsAirgap.R",
"chars": 1521,
"preview": "#' @title Time series of monthly airline passengers (with NAs)\n#'\n#' @description Monthly totals of international airlin"
},
{
"path": "R/tsAirgapComplete.R",
"chars": 1525,
"preview": "#' @title Time series of monthly airline passengers (complete)\n#'\n#' @description Monthly totals of international airlin"
},
{
"path": "R/tsHeating.R",
"chars": 1900,
"preview": "#' @title Time series of a heating systems supply temperature (with NAs)\n#'\n#' @description Time series of a heating sys"
},
{
"path": "R/tsHeatingComplete.R",
"chars": 1908,
"preview": "#' @title Time series of a heating systems supply temperature (complete)\n#'\n#' @description Time series of a heating sys"
},
{
"path": "R/tsNH4.R",
"chars": 1706,
"preview": "#' @title Time series of NH4 concentration in a wastewater system (with NAs)\n#' \n#' @description Time series of NH4 conc"
},
{
"path": "R/tsNH4Complete.R",
"chars": 1716,
"preview": "#' @title Time series of NH4 concentration in a wastewater system (complete)\n#' \n#' @description Time series of NH4 conc"
},
{
"path": "README.md",
"chars": 8354,
"preview": " <!-- badges: start -->\n[![Project Status: Active The project has reached a stable, usable state and is being actively "
},
{
"path": "_pkgdown.yaml",
"chars": 175,
"preview": "title: imputeTS\nurl: https://SteffenMoritz.github.io/imputeTS\ntemplate:\n params:\n bootswatch: flatly\n\n\nauthors:\nStef"
},
{
"path": "codecov.yml",
"chars": 232,
"preview": "comment: false\n\ncoverage:\n status:\n project:\n default:\n target: auto\n threshold: 1%\n infor"
},
{
"path": "docs/404.html",
"chars": 6158,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
},
{
"path": "docs/articles/gallery_visualizations.html",
"chars": 19302,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
},
{
"path": "docs/articles/gallery_visualizations_files/accessible-code-block-0.0.1/empty-anchor.js",
"chars": 653,
"preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
},
{
"path": "docs/articles/gallery_visualizations_files/header-attrs-2.16/header-attrs.js",
"chars": 507,
"preview": "// Pandoc 2.9 adds attributes on both header and div. We remove the former (to\n// be compatible with the behavior of Pan"
},
{
"path": "docs/articles/gallery_visualizations_files/header-attrs-2.7/header-attrs.js",
"chars": 507,
"preview": "// Pandoc 2.9 adds attributes on both header and div. We remove the former (to\n// be compatible with the behavior of Pan"
},
{
"path": "docs/articles/index.html",
"chars": 5570,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/authors.html",
"chars": 7223,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/bootstrap-toc.css",
"chars": 1843,
"preview": "/*!\n * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)\n * Copyright 2015 Aidan Feldman\n * Lic"
},
{
"path": "docs/bootstrap-toc.js",
"chars": 4764,
"preview": "/*!\n * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)\n * Copyright 2015 Aidan Feldman\n * Lic"
},
{
"path": "docs/docsearch.css",
"chars": 11758,
"preview": "/* Docsearch -------------------------------------------------------------- */\n/*\n Source: https://github.com/algolia/d"
},
{
"path": "docs/docsearch.js",
"chars": 2018,
"preview": "$(function() {\n\n // register a handler to move the focus to the search bar\n // upon pressing shift + \"/\" (i.e. \"?\")\n "
},
{
"path": "docs/index.html",
"chars": 19809,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
},
{
"path": "docs/news/index.html",
"chars": 25503,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/pkgdown.css",
"chars": 7308,
"preview": "/* Sticky footer */\n\n/**\n * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/\n * Details"
},
{
"path": "docs/pkgdown.js",
"chars": 3248,
"preview": "/* http://gregfranko.com/blog/jquery-best-practices/ */\n(function($) {\n $(function() {\n\n $('.navbar-fixed-top').head"
},
{
"path": "docs/pkgdown.yml",
"chars": 269,
"preview": "pandoc: '3.4'\npkgdown: 2.1.3\npkgdown_sha: ~\narticles:\n gallery_visualizations: gallery_visualizations.html\nlast_built: "
},
{
"path": "docs/reference/ggplot_na_distribution.html",
"chars": 22223,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/ggplot_na_distribution2.html",
"chars": 22560,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/ggplot_na_gapsize.html",
"chars": 22694,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/ggplot_na_gapsize2.html",
"chars": 25932,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/ggplot_na_imputations.html",
"chars": 27042,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/ggplot_na_intervals.html",
"chars": 7159,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/ggplot_na_level.html",
"chars": 23263,
"preview": "<!-- Generated by pkgdown: do not edit by hand -->\n<!DOCTYPE html>\n<html lang=\"en\">\n <head>\n <meta charset=\"utf-8\">\n<m"
},
{
"path": "docs/reference/ggplot_na_level2.html",
"chars": 23427,
"preview": "<!-- Generated by pkgdown: do not edit by hand -->\n<!DOCTYPE html>\n<html lang=\"en\">\n <head>\n <meta charset=\"utf-8\">\n<m"
},
{
"path": "docs/reference/ggplot_na_pattern.html",
"chars": 20135,
"preview": "<!-- Generated by pkgdown: do not edit by hand -->\n<!DOCTYPE html>\n<html lang=\"en\">\n <head>\n <meta charset=\"utf-8\">\n<m"
},
{
"path": "docs/reference/imputeTS-package.html",
"chars": 9637,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/imputeTS.html",
"chars": 308,
"preview": "<html>\n <head>\n <meta http-equiv=\"refresh\" content=\"0;URL=https://SteffenMoritz.github.io/imputeTS/reference/imputeT"
},
{
"path": "docs/reference/index.html",
"chars": 9967,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.interpolation.html",
"chars": 8656,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.kalman.html",
"chars": 10503,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.locf.html",
"chars": 7964,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.ma.html",
"chars": 8411,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.mean.html",
"chars": 8041,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.random.html",
"chars": 8096,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.remove.html",
"chars": 7062,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.replace.html",
"chars": 7759,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.seadec.html",
"chars": 9022,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na.seasplit.html",
"chars": 9041,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_interpolation.html",
"chars": 17699,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_kalman.html",
"chars": 34489,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_locf.html",
"chars": 15027,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_ma.html",
"chars": 25131,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_mean.html",
"chars": 12484,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_random.html",
"chars": 12486,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_remove.html",
"chars": 11113,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_replace.html",
"chars": 11528,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_seadec.html",
"chars": 23013,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/na_seasplit.html",
"chars": 18099,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/plotNA.distribution.html",
"chars": 7148,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/plotNA.distributionBar.html",
"chars": 7171,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/plotNA.gapsize.html",
"chars": 7081,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/plotNA.imputations.html",
"chars": 7133,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/reexports.html",
"chars": 6245,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/statsNA.html",
"chars": 24080,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/tsAirgap.html",
"chars": 8073,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/tsAirgapComplete.html",
"chars": 8122,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/tsHeating.html",
"chars": 8597,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/tsHeatingComplete.html",
"chars": 8641,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/tsNH4.html",
"chars": 8323,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/reference/tsNH4Complete.html",
"chars": 8371,
"preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
},
{
"path": "docs/sitemap.xml",
"chars": 4485,
"preview": "<urlset xmlns = 'http://www.sitemaps.org/schemas/sitemap/0.9'>\n<url><loc>https://SteffenMoritz.github.io/imputeTS/404.ht"
},
{
"path": "imputeTS.Rproj",
"chars": 343,
"preview": "Version: 1.0\n\nRestoreWorkspace: Default\nSaveWorkspace: Default\nAlwaysSaveHistory: Default\n\nEnableCodeIndexing: Yes\nUseSp"
},
{
"path": "inst/CITATION",
"chars": 469,
"preview": "year <- sub(\"-.*\", \"\", meta$Date)\nvers <- paste(\"R package version\", meta$Version)\n\ncitHeader(\"To cite the imputeTS pack"
},
{
"path": "man/ggplot_na_distribution.Rd",
"chars": 4985,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ggplot_na_distribution.R\n\\name{ggplot_na_d"
},
{
"path": "man/ggplot_na_distribution2.Rd",
"chars": 6138,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ggplot_na_distribution2.R\n\\name{ggplot_na_"
},
{
"path": "man/ggplot_na_gapsize.Rd",
"chars": 6445,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ggplot_na_gapsize.R\n\\name{ggplot_na_gapsiz"
},
{
"path": "man/ggplot_na_gapsize2.Rd",
"chars": 8212,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ggplot_na_gapsize2.R\n\\name{ggplot_na_gapsi"
},
{
"path": "man/ggplot_na_imputations.Rd",
"chars": 7430,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ggplot_na_imputations.R\n\\name{ggplot_na_im"
},
{
"path": "man/ggplot_na_intervals.Rd",
"chars": 701,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{ggplot_na_inter"
},
{
"path": "man/imputeTS-package.Rd",
"chars": 2551,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/imputeTS-package.R\n\\docType{package}\n\\name"
},
{
"path": "man/na.interpolation.Rd",
"chars": 1413,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.interpolatio"
},
{
"path": "man/na.kalman.Rd",
"chars": 2673,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.kalman}\n\\ali"
},
{
"path": "man/na.locf.Rd",
"chars": 1168,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.locf}\n\\alias"
},
{
"path": "man/na.ma.Rd",
"chars": 1529,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.ma}\n\\alias{n"
},
{
"path": "man/na.mean.Rd",
"chars": 1266,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.mean}\n\\alias"
},
{
"path": "man/na.random.Rd",
"chars": 1186,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.random}\n\\ali"
},
{
"path": "man/na.remove.Rd",
"chars": 601,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.remove}\n\\ali"
},
{
"path": "man/na.replace.Rd",
"chars": 1009,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.replace}\n\\al"
},
{
"path": "man/na.seadec.Rd",
"chars": 1821,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.seadec}\n\\ali"
},
{
"path": "man/na.seasplit.Rd",
"chars": 1826,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{na.seasplit}\n\\a"
},
{
"path": "man/na_interpolation.Rd",
"chars": 4091,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_interpolation.R\n\\name{na_interpolation}"
},
{
"path": "man/na_kalman.Rd",
"chars": 4741,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_kalman.R\n\\name{na_kalman}\n\\alias{na_kal"
},
{
"path": "man/na_locf.Rd",
"chars": 4003,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_locf.R\n\\name{na_locf}\n\\alias{na_locf}\n\\"
},
{
"path": "man/na_ma.Rd",
"chars": 3844,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_ma.R\n\\name{na_ma}\n\\alias{na_ma}\n\\title{"
},
{
"path": "man/na_mean.Rd",
"chars": 2524,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_mean.R\n\\name{na_mean}\n\\alias{na_mean}\n\\"
},
{
"path": "man/na_random.Rd",
"chars": 2132,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_random.R\n\\name{na_random}\n\\alias{na_ran"
},
{
"path": "man/na_remove.Rd",
"chars": 1508,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_remove.R\n\\name{na_remove}\n\\alias{na_rem"
},
{
"path": "man/na_replace.Rd",
"chars": 1581,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_replace.R\n\\name{na_replace}\n\\alias{na_r"
},
{
"path": "man/na_seadec.Rd",
"chars": 4592,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_seadec.R\n\\name{na_seadec}\n\\alias{na_sea"
},
{
"path": "man/na_seasplit.Rd",
"chars": 2597,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/na_seasplit.R\n\\name{na_seasplit}\n\\alias{na"
},
{
"path": "man/plotNA.distribution.Rd",
"chars": 697,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{plotNA.distribu"
},
{
"path": "man/plotNA.distributionBar.Rd",
"chars": 710,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{plotNA.distribu"
},
{
"path": "man/plotNA.gapsize.Rd",
"chars": 666,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{plotNA.gapsize}"
},
{
"path": "man/plotNA.imputations.Rd",
"chars": 690,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/deprecated_defunct.R\n\\name{plotNA.imputati"
},
{
"path": "man/reexports.Rd",
"chars": 410,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/imputeTS-package.R\n\\docType{import}\n\\name{"
},
{
"path": "man/statsNA.Rd",
"chars": 2988,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/statsNA.R\n\\name{statsNA}\n\\alias{statsNA}\n\\"
},
{
"path": "man/tsAirgap.Rd",
"chars": 1537,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tsAirgap.R\n\\docType{data}\n\\name{tsAirgap}\n"
},
{
"path": "man/tsAirgapComplete.Rd",
"chars": 1565,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tsAirgapComplete.R\n\\docType{data}\n\\name{ts"
},
{
"path": "man/tsHeating.Rd",
"chars": 1903,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tsHeating.R\n\\docType{data}\n\\name{tsHeating"
},
{
"path": "man/tsHeatingComplete.Rd",
"chars": 1927,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tsHeatingComplete.R\n\\docType{data}\n\\name{t"
},
{
"path": "man/tsNH4.Rd",
"chars": 1709,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tsNH4.R\n\\docType{data}\n\\name{tsNH4}\n\\alias"
},
{
"path": "man/tsNH4Complete.Rd",
"chars": 1737,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tsNH4Complete.R\n\\docType{data}\n\\name{tsNH4"
},
{
"path": "src/RcppExports.cpp",
"chars": 1619,
"preview": "// Generated by using Rcpp::compileAttributes() -> do not edit by hand\n// Generator token: 10BE3573-1514-4C36-9D1C-5A225"
},
{
"path": "src/locf.cpp",
"chars": 674,
"preview": "#include <Rcpp.h>\nusing namespace Rcpp;\n\n\n// [[Rcpp::export]]\nRcpp::NumericVector locf(NumericVector x, bool reverse) \n{"
},
{
"path": "src/ma.cpp",
"chars": 3093,
"preview": "#include <Rcpp.h>\nusing namespace Rcpp;\n\n\nstruct pow_wrapper {\n public: double operator()(double a, double b) {\n ret"
},
{
"path": "tests/testthat/test-apply_base_algorithm.R",
"chars": 171,
"preview": "context(\"apply_base_algorithm\")\n\ntest_that(\"Warning for wrong algorithm choice\", {\n expect_error(\n apply_base_algori"
},
{
"path": "tests/testthat/test-depreciated_defunct.R",
"chars": 1094,
"preview": "context(\"Defunct and Depreciated Functions\")\n\n\n\ntest_that(\"Correct error for old, defunct plotting functions\", {\n expec"
},
{
"path": "tests/testthat/test-error_handling.R",
"chars": 2364,
"preview": "\ncontext(\"error-handling\")\n# These test are to make sure, the right errors / warnings are given \n# for wrong input data "
},
{
"path": "tests/testthat/test-ggplot_na_distribution.R",
"chars": 7021,
"preview": "context(\"ggplot_na_distribution\")\n\ntest_that(\"Old functions give error\", {\n expect_error(plotNA.distribution(tsAirgap))"
},
{
"path": "tests/testthat/test-ggplot_na_distribution2.R",
"chars": 5856,
"preview": "context(\"ggplot_na_distribution2\")\n\n\ntest_that(\"Old functions give error\", {\n expect_error(plotNA.distributionBar(tsAir"
},
{
"path": "tests/testthat/test-ggplot_na_gapsize.R",
"chars": 6495,
"preview": "context(\"ggplot_na_gapsize\")\n\n\ntest_that(\"Old functions give error\", {\n expect_error(plotNA.gapsize(tsAirgap))\n})\n\ntest"
},
{
"path": "tests/testthat/test-ggplot_na_gapsize2.R",
"chars": 7681,
"preview": "context(\"ggplot_na_gapsize2\")\n\n\ntest_that(\"Check that all parameters of plot run without error\", {\n if (!requireNamesp"
},
{
"path": "tests/testthat/test-ggplot_na_imputations.R",
"chars": 8073,
"preview": "context(\"ggplot_na_imputations\")\n\ntest_that(\"Old functions give error\", {\n imp_mean <- na_mean(tsAirgap)\n expect_error"
},
{
"path": "tests/testthat/test-input-na_advanced-tsObjects.R",
"chars": 8962,
"preview": "context(\"Advanced Time Series Objects Input\")\n\n\ntest_that(\"tsibble objects\", {\n skip_on_cran()\n if (!requireNamespace("
},
{
"path": "tests/testthat/test-na_interpolation.R",
"chars": 4319,
"preview": "context(\"na_interpolation\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_interpolation(c(NA, NA, NA, NA,"
},
{
"path": "tests/testthat/test-na_kalman.R",
"chars": 3940,
"preview": "context(\"na_kalman\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_kalman(c(NA, NA, NA, NA, NA)))\n})\n\ntes"
},
{
"path": "tests/testthat/test-na_locf.R",
"chars": 6547,
"preview": "context(\"na_locf\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_locf(c(NA, NA, NA, NA, NA)))\n})\n\ntest_th"
},
{
"path": "tests/testthat/test-na_ma.R",
"chars": 6158,
"preview": "context(\"na_ma\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_ma(c(NA, NA, NA, NA, NA)))\n})\n\ntest_that(\""
},
{
"path": "tests/testthat/test-na_mean.R",
"chars": 3265,
"preview": "context(\"na_mean\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_mean(c(NA, NA, NA, NA, NA)))\n})\n\ntest_th"
},
{
"path": "tests/testthat/test-na_random.R",
"chars": 1695,
"preview": "context(\"na_random\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_random(c(NA, NA, NA, NA, NA)))\n})\n\ntes"
},
{
"path": "tests/testthat/test-na_remove.R",
"chars": 1829,
"preview": "context(\"na_remove\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_remove(c(NA, NA, NA, NA, NA)))\n})\n\ntes"
},
{
"path": "tests/testthat/test-na_replace.R",
"chars": 2758,
"preview": "context(\"na_replace\")\n\n\ntest_that(\"All NA vector throws no error\", {\n expect_equal(sum(na_replace(c(NA, NA, NA, NA, NA)"
},
{
"path": "tests/testthat/test-na_seadec.R",
"chars": 7822,
"preview": "context(\"na_seadec\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_seadec(c(NA, NA, NA, NA, NA)))\n})\n\ntes"
},
{
"path": "tests/testthat/test-na_seasplit.R",
"chars": 7746,
"preview": "context(\"na_seasplit\")\n\ntest_that(\"All NA vector throws error\", {\n expect_error(na_seasplit(c(NA, NA, NA, NA, NA)))\n})\n"
},
{
"path": "tests/testthat/test-parameter-maxgap.R",
"chars": 2671,
"preview": "context(\"maxgap\")\n\n\ntest_that(\"Test that function works and prints output\", {\n x <- tsAirgap\n x[4] <- NA\n x[144] <- N"
},
{
"path": "tests/testthat/test-statsNA.R",
"chars": 667,
"preview": "context(\"statsNA\")\n\n\ntest_that(\"Test that function works and prints output\", {\n expect_output(statsNA(tsAirgap, print_o"
},
{
"path": "tests/testthat.R",
"chars": 41,
"preview": "library(testthat)\ntest_check(\"imputeTS\")\n"
},
{
"path": "vignettes/Cheat_Sheet_imputeTS.pdf.asis",
"chars": 174,
"preview": "%\\VignetteIndexEntry{Cheat Sheet imputeTS}\n%\\VignetteEngine{R.rsp::asis}\n%\\VignetteKeyword{PDF}\n%\\VignetteKeyword{HTML}\n"
},
{
"path": "vignettes/RJournal.sty",
"chars": 11495,
"preview": "% Package `RJournal' to use with LaTeX2e\n% Copyright (C) 2010 by the R Foundation\n% Copyright (C) 2013 by the R Journal\n"
},
{
"path": "vignettes/gallery_visualizations.Rmd",
"chars": 7668,
"preview": "---\ntitle: \"Gallery: Times Series Missing Data Visualizations\"\nauthor: \"Steffen Moritz\"\ndate: \"`r Sys.Date()`\"\noutput: r"
},
{
"path": "vignettes/imputeTS-Time-Series-Missing-Value-Imputation-in-R.ltx",
"chars": 40650,
"preview": "%\\VignetteIndexEntry{imputeTS: Time Series Missing Value Imputation in R}\n%\\VignetteEngine{R.rsp::tex}\n\\documentclass[a4"
}
]
// ... and 8 more files (download for full content)
About this extraction
This page contains the full source code of the SteffenMoritz/imputeTS GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 180 files (1.2 MB), approximately 366.3k tokens, and a symbol index with 12 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.