Repository: rust-ndarray/ndarray-stats Branch: master Commit: 67d0292f483c Files: 35 Total size: 236.6 KB Directory structure: gitextract_4oek7nb6/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ └── bug_report.md │ └── workflows/ │ ├── ci.yml │ └── latest-deps.yml ├── .gitignore ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches/ │ ├── deviation.rs │ ├── sort.rs │ └── summary_statistics.rs ├── codecov.yml ├── src/ │ ├── correlation.rs │ ├── deviation.rs │ ├── entropy.rs │ ├── errors.rs │ ├── histogram/ │ │ ├── bins.rs │ │ ├── errors.rs │ │ ├── grid.rs │ │ ├── histograms.rs │ │ ├── mod.rs │ │ └── strategies.rs │ ├── lib.rs │ ├── maybe_nan/ │ │ ├── impl_not_none.rs │ │ └── mod.rs │ ├── quantile/ │ │ ├── interpolate.rs │ │ └── mod.rs │ ├── sort.rs │ └── summary_statistics/ │ ├── means.rs │ └── mod.rs └── tests/ ├── deviation.rs ├── maybe_nan.rs ├── quantile.rs ├── sort.rs └── summary_statistics.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a bug report for ndarray-stats title: '' labels: '' assignees: '' --- **Description** Description of the bug. **Version Information** - `ndarray`: ??? - `ndarray-stats`: ??? - Rust: ??? Please make sure that: - the version of `ndarray-stats` you're using corresponds to the version of `ndarray` you're using - the version of the Rust compiler you're using is supported by the version of `ndarray-stats` you're using (See the "Releases" section of the README for correct version information.) **To Reproduce** Example code which doesn't work. **Expected behavior** Description of what you expected to happen. **Additional context** Add any other context about the problem here. ================================================ FILE: .github/workflows/ci.yml ================================================ name: Continuous integration on: push: branches: [ master ] pull_request: branches: [ master ] env: CARGO_TERM_COLOR: always RUSTFLAGS: "-D warnings" jobs: test: runs-on: ubuntu-latest strategy: matrix: rust: - stable - beta - nightly - 1.65.0 # MSRV steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.rust }} - name: Build run: cargo build --verbose - name: Run tests run: cargo test --verbose cross_test: runs-on: ubuntu-latest strategy: matrix: include: # 64-bit, big-endian - rust: stable target: s390x-unknown-linux-gnu # 32-bit, little-endian - rust: stable target: i686-unknown-linux-gnu steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} - name: Install cross run: cargo install cross -f - name: Build run: cross build --verbose --target=${{ matrix.target }} - name: Run tests run: cross test --verbose --target=${{ matrix.target }} format: runs-on: ubuntu-latest strategy: matrix: rust: - stable steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.rust }} override: true components: rustfmt - name: Rustfmt run: cargo fmt -- --check coverage: runs-on: ubuntu-latest strategy: matrix: rust: - nightly steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.rust }} - name: Install tarpaulin uses: taiki-e/cache-cargo-install-action@v2 with: tool: cargo-tarpaulin - name: Generate code coverage run: cargo tarpaulin --verbose --all-features --workspace --timeout 120 --out Xml - name: Upload to codecov.io uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: fail_ci_if_error: true ================================================ FILE: .github/workflows/latest-deps.yml ================================================ name: Check Latest Dependencies on: schedule: # Chosen so that it runs right before the international date line experiences the weekend. # Since we're open source, that means globally we should be aware of it right when we have the most # time to fix it. # # Sorry if this ruins your weekend, future maintainer... - cron: '0 12 * * FRI' workflow_dispatch: # For running manually pull_request: paths: - '.github/workflows/latest-deps.yaml' env: CARGO_TERM_COLOR: always HOST: x86_64-unknown-linux-gnu RUSTFLAGS: "-D warnings" MSRV: 1.65.0 jobs: latest_deps_stable: runs-on: ubuntu-latest name: Check Latest Dependencies on Stable steps: - name: Check Out Repo uses: actions/checkout@v4 - name: Install Rust uses: dtolnay/rust-toolchain@master with: toolchain: stable - name: Setup Mold Linker uses: rui314/setup-mold@v1 - name: Setup Rust Cache uses: Swatinem/rust-cache@v2 - name: Install nextest uses: taiki-e/install-action@nextest - name: Ensure latest dependencies run: cargo update - name: Run Tests run: cargo nextest run latest_deps_msrv: runs-on: ubuntu-latest name: Check Latest Dependencies on MSRV steps: - name: Check Out Repo uses: actions/checkout@v4 - name: Install Stable Rust for Update uses: dtolnay/rust-toolchain@master with: toolchain: stable - name: Setup Mold Linker uses: rui314/setup-mold@v1 - name: Setup Rust Cache uses: Swatinem/rust-cache@v2 - name: Install nextest uses: taiki-e/install-action@nextest - name: Ensure latest dependencies # The difference is here between this and `latest_deps_stable` run: CARGO_RESOLVER_INCOMPATIBLE_RUST_VERSIONS="fallback" cargo update - name: Install MSRV Rust for Test uses: dtolnay/rust-toolchain@master with: toolchain: ${{ env.MSRV }} - name: Run Tests run: cargo nextest run ================================================ FILE: .gitignore ================================================ /target **/*.rs.bk # IDE-related tags rusty-tags.vi .vscode ================================================ FILE: Cargo.toml ================================================ [package] name = "ndarray-stats" version = "0.7.0" authors = [ "Jim Turner ", "LukeMathWalker ", ] edition = "2018" rust-version = "1.65.0" license = "MIT/Apache-2.0" repository = "https://github.com/rust-ndarray/ndarray-stats" documentation = "https://docs.rs/ndarray-stats/" readme = "README.md" description = "Statistical routines for the n-dimensional array data structures provided by ndarray." keywords = ["array", "multidimensional", "statistics", "matrix", "ndarray"] categories = ["data-structures", "science"] [dependencies] ndarray = "0.17.1" noisy_float = "0.2.0" num-integer = "0.1" num-traits = "0.2" rand = "0.8.3" itertools = { version = "0.13", default-features = false } indexmap = "2.4" [dev-dependencies] ndarray = { version = "0.17.1", features = ["approx"] } criterion = "0.5.1" quickcheck = { version = "0.9.2", default-features = false } ndarray-rand = "0.16.0" approx = "0.5" quickcheck_macros = "1.0.0" num-bigint = "0.4.0" [[bench]] name = "sort" harness = false [[bench]] name = "summary_statistics" harness = false [[bench]] name = "deviation" harness = false ================================================ FILE: LICENSE-APACHE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: LICENSE-MIT ================================================ Copyright 2018–2024 ndarray-stats developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # ndarray-stats [![Coverage](https://codecov.io/gh/rust-ndarray/ndarray-stats/branch/master/graph/badge.svg)](https://codecov.io/gh/rust-ndarray/ndarray-stats) [![Dependencies status](https://deps.rs/repo/github/rust-ndarray/ndarray-stats/status.svg)](https://deps.rs/repo/github/rust-ndarray/ndarray-stats) [![Crate](https://img.shields.io/crates/v/ndarray-stats.svg)](https://crates.io/crates/ndarray-stats) [![Documentation](https://docs.rs/ndarray-stats/badge.svg)](https://docs.rs/ndarray-stats) This crate provides statistical methods for [`ndarray`]'s `ArrayRef` type. Currently available routines include: - order statistics (minimum, maximum, median, quantiles, etc.); - summary statistics (mean, skewness, kurtosis, central moments, etc.) - partitioning; - correlation analysis (covariance, pearson correlation); - measures from information theory (entropy, KL divergence, etc.); - deviation functions (distances, counts, errors, etc.); - histogram computation. See the [documentation](https://docs.rs/ndarray-stats) for more information. Please feel free to contribute new functionality! A roadmap can be found [here](https://github.com/rust-ndarray/ndarray-stats/issues/1). [`ndarray`]: https://github.com/rust-ndarray/ndarray ## Using with Cargo ```toml [dependencies] ndarray = "0.17.1" ndarray-stats = "0.7.0" ``` ## Releases * **0.7.0** * Breaking changes * Minimum supported Rust version: `1.65.0` * Updated to `ndarray:v0.17.1` * **0.6.0** * Breaking changes * Minimum supported Rust version: `1.64.0` * Updated to `ndarray:v0.16.0` * Updated to `approx:v0.5.0` * Updated to `ndarray-rand:v0.15.0` * Updated to `indexmap:v2.4` * Updated to `itertools:v0.13` *Contributors*: [@bluss](https://github.com/bluss) * **0.5.1** * Fixed bug in implementation of `MaybeNaN::remove_nan_mut` for `f32` and `f64` for views with non-standard layouts. Before this fix, the bug could cause incorrect results, buffer overflows, etc., in this method and others which use it. Thanks to [@JacekCzupyt](https://github.com/JacekCzupyt) for reporting the issue (#89). * Minor docs improvements. *Contributors*: [@jturner314](https://github.com/jturner314), [@BenMoon](https://github.com/BenMoon) * **0.5.0** * Breaking changes * Minimum supported Rust version: `1.49.0` * Updated to `ndarray:v0.15.0` *Contributors*: [@Armavica](https://github.com/armavica), [@cassiersg](https://github.com/cassiersg) * **0.4.0** * Breaking changes * Minimum supported Rust version: `1.42.0` * New functionality: * Summary statistics: * Weighted variance * Weighted standard deviation * Improvements / breaking changes: * Documentation improvements for Histograms * Updated to `ndarray:v0.14.0` *Contributors*: [@munckymagik](https://github.com/munckymagik), [@nilgoyette](https://github.com/nilgoyette), [@LukeMathWalker](https://github.com/LukeMathWalker), [@lebensterben](https://github.com/lebensterben), [@xd009642](https://github.com/xd009642) * **0.3.0** * Breaking changes * Minimum supported Rust version: `1.37` * New functionality: * Deviation functions: * Counts equal/unequal * `l1`, `l2`, `linf` distances * (Root) mean squared error * Peak signal-to-noise ratio * Summary statistics: * Weighted sum * Weighted mean * Improvements / breaking changes: * Updated to `ndarray:v0.13.0` *Contributors*: [@munckymagik](https://github.com/munckymagik), [@nilgoyette](https://github.com/nilgoyette), [@jturner314](https://github.com/jturner314), [@LukeMathWalker](https://github.com/LukeMathWalker) * **0.2.0** * Breaking changes * All `ndarray-stats`' extension traits are now impossible to implement by users of the library (see [#34]) * Redesigned error handling across the whole crate, standardising on `Result` * New functionality: * Summary statistics: * Harmonic mean * Geometric mean * Central moments * Kurtosis * Skewness * Information theory: * Entropy * Cross-entropy * Kullback-Leibler divergence * Quantiles and order statistics: * `argmin` / `argmin_skipnan` * `argmax` / `argmax_skipnan` * Optimized bulk quantile computation (`quantiles_mut`, `quantiles_axis_mut`) * Fixes: * Reduced occurrences of overflow for `interpolate::midpoint` *Contributors*: [@jturner314](https://github.com/jturner314), [@LukeMathWalker](https://github.com/LukeMathWalker), [@phungleson](https://github.com/phungleson), [@munckymagik](https://github.com/munckymagik) [#34]: https://github.com/rust-ndarray/ndarray-stats/issues/34 * **0.1.0** * Initial release by @LukeMathWalker and @jturner314. ## Contributing Please feel free to create issues and submit PRs. ## License Copyright 2018–2024 `ndarray-stats` developers Licensed under the [Apache License, Version 2.0](LICENSE-APACHE), or the [MIT license](LICENSE-MIT), at your option. You may not use this project except in compliance with those terms. ================================================ FILE: benches/deviation.rs ================================================ use criterion::{ black_box, criterion_group, criterion_main, AxisScale, Criterion, PlotConfiguration, }; use ndarray::prelude::*; use ndarray_rand::rand_distr::Uniform; use ndarray_rand::RandomExt; use ndarray_stats::DeviationExt; fn sq_l2_dist(c: &mut Criterion) { let lens = vec![10, 100, 1000, 10000]; let mut group = c.benchmark_group("sq_l2_dist"); group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); for len in &lens { group.bench_with_input(format!("{}", len), len, |b, &len| { let data = Array::random(len, Uniform::new(0.0, 1.0).unwrap()); let data2 = Array::random(len, Uniform::new(0.0, 1.0).unwrap()); b.iter(|| black_box(data.sq_l2_dist(&data2).unwrap())) }); } group.finish(); } criterion_group! { name = benches; config = Criterion::default(); targets = sq_l2_dist } criterion_main!(benches); ================================================ FILE: benches/sort.rs ================================================ use criterion::{ black_box, criterion_group, criterion_main, AxisScale, BatchSize, Criterion, PlotConfiguration, }; use ndarray::prelude::*; use ndarray_stats::Sort1dExt; use rand::prelude::*; fn get_from_sorted_mut(c: &mut Criterion) { let lens = vec![10, 100, 1000, 10000]; let mut group = c.benchmark_group("get_from_sorted_mut"); group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); for len in &lens { group.bench_with_input(format!("{}", len), len, |b, &len| { let mut rng = StdRng::seed_from_u64(42); let mut data: Vec<_> = (0..len).collect(); data.shuffle(&mut rng); let indices: Vec<_> = (0..len).step_by(len / 10).collect(); b.iter_batched( || Array1::from(data.clone()), |mut arr| { for &i in &indices { black_box(arr.get_from_sorted_mut(i)); } }, BatchSize::SmallInput, ) }); } group.finish(); } fn get_many_from_sorted_mut(c: &mut Criterion) { let lens = vec![10, 100, 1000, 10000]; let mut group = c.benchmark_group("get_many_from_sorted_mut"); group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); for len in &lens { group.bench_with_input(format!("{}", len), len, |b, &len| { let mut rng = StdRng::seed_from_u64(42); let mut data: Vec<_> = (0..len).collect(); data.shuffle(&mut rng); let indices: Array1<_> = (0..len).step_by(len / 10).collect(); b.iter_batched( || Array1::from(data.clone()), |mut arr| { black_box(arr.get_many_from_sorted_mut(&indices)); }, BatchSize::SmallInput, ) }); } group.finish(); } criterion_group! { name = benches; config = Criterion::default(); targets = get_from_sorted_mut, get_many_from_sorted_mut } criterion_main!(benches); ================================================ FILE: benches/summary_statistics.rs ================================================ use criterion::{ black_box, criterion_group, criterion_main, AxisScale, BatchSize, Criterion, PlotConfiguration, }; use ndarray::prelude::*; use ndarray_rand::rand_distr::Uniform; use ndarray_rand::RandomExt; use ndarray_stats::SummaryStatisticsExt; fn weighted_std(c: &mut Criterion) { let lens = vec![10, 100, 1000, 10000]; let mut group = c.benchmark_group("weighted_std"); group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); for len in &lens { group.bench_with_input(format!("{}", len), len, |b, &len| { let data = Array::random(len, Uniform::new(0.0, 1.0).unwrap()); let mut weights = Array::random(len, Uniform::new(0.0, 1.0).unwrap()); weights /= weights.sum(); b.iter_batched( || data.clone(), |arr| { black_box(arr.weighted_std(&weights, 0.0).unwrap()); }, BatchSize::SmallInput, ) }); } group.finish(); } criterion_group! { name = benches; config = Criterion::default(); targets = weighted_std } criterion_main!(benches); ================================================ FILE: codecov.yml ================================================ comment: off coverage: status: project: default: target: auto threshold: 2 base: auto patch: default: target: auto threshold: 2 base: auto ================================================ FILE: src/correlation.rs ================================================ use crate::errors::EmptyInput; use ndarray::prelude::*; use num_traits::{Float, FromPrimitive}; /// Extension trait for `ndarray` providing functions /// to compute different correlation measures. pub trait CorrelationExt { /// Return the covariance matrix `C` for a 2-dimensional /// array of observations `M`. /// /// Let `(r, o)` be the shape of `M`: /// - `r` is the number of random variables; /// - `o` is the number of observations we have collected /// for each random variable. /// /// Every column in `M` is an experiment: a single observation for each /// random variable. /// Each row in `M` contains all the observations for a certain random variable. /// /// The parameter `ddof` specifies the "delta degrees of freedom". For /// example, to calculate the population covariance, use `ddof = 0`, or to /// calculate the sample covariance (unbiased estimate), use `ddof = 1`. /// /// The covariance of two random variables is defined as: /// /// ```text /// 1 n /// cov(X, Y) = ―――――――― ∑ (xᵢ - x̅)(yᵢ - y̅) /// n - ddof i=1 /// ``` /// /// where /// /// ```text /// 1 n /// x̅ = ― ∑ xᵢ /// n i=1 /// ``` /// and similarly for ̅y. /// /// If `M` is empty (either zero observations or zero random variables), it returns `Err(EmptyInput)`. /// /// **Panics** if `ddof` is negative or greater than or equal to the number of /// observations, or if the type cast of `n_observations` from `usize` to `A` fails. /// /// # Example /// /// ``` /// use ndarray::{aview2, arr2}; /// use ndarray_stats::CorrelationExt; /// /// let a = arr2(&[[1., 3., 5.], /// [2., 4., 6.]]); /// let covariance = a.cov(1.).unwrap(); /// assert_eq!( /// covariance, /// aview2(&[[4., 4.], [4., 4.]]) /// ); /// ``` fn cov(&self, ddof: A) -> Result, EmptyInput> where A: Float + FromPrimitive; /// Return the [Pearson correlation coefficients](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) /// for a 2-dimensional array of observations `M`. /// /// Let `(r, o)` be the shape of `M`: /// - `r` is the number of random variables; /// - `o` is the number of observations we have collected /// for each random variable. /// /// Every column in `M` is an experiment: a single observation for each /// random variable. /// Each row in `M` contains all the observations for a certain random variable. /// /// The Pearson correlation coefficient of two random variables is defined as: /// /// ```text /// cov(X, Y) /// rho(X, Y) = ―――――――――――― /// std(X)std(Y) /// ``` /// /// Let `R` be the matrix returned by this function. Then /// ```text /// R_ij = rho(X_i, X_j) /// ``` /// /// If `M` is empty (either zero observations or zero random variables), it returns `Err(EmptyInput)`. /// /// **Panics** if the type cast of `n_observations` from `usize` to `A` fails or /// if the standard deviation of one of the random variables is zero and /// division by zero panics for type A. /// /// # Example /// /// ``` /// use approx; /// use ndarray::arr2; /// use ndarray_stats::CorrelationExt; /// use approx::AbsDiffEq; /// /// let a = arr2(&[[1., 3., 5.], /// [2., 4., 6.]]); /// let corr = a.pearson_correlation().unwrap(); /// let epsilon = 1e-7; /// assert!( /// corr.abs_diff_eq( /// &arr2(&[ /// [1., 1.], /// [1., 1.], /// ]), /// epsilon /// ) /// ); /// ``` fn pearson_correlation(&self) -> Result, EmptyInput> where A: Float + FromPrimitive; private_decl! {} } impl CorrelationExt for ArrayRef2 { fn cov(&self, ddof: A) -> Result, EmptyInput> where A: Float + FromPrimitive, { let observation_axis = Axis(1); let n_observations = A::from_usize(self.len_of(observation_axis)).unwrap(); let dof = if ddof >= n_observations { panic!( "`ddof` needs to be strictly smaller than the \ number of observations provided for each \ random variable!" ) } else { n_observations - ddof }; let mean = self.mean_axis(observation_axis); match mean { Some(mean) => { let denoised = self - mean.insert_axis(observation_axis); let covariance = denoised.dot(&denoised.t()); Ok(covariance.mapv_into(|x| x / dof)) } None => Err(EmptyInput), } } fn pearson_correlation(&self) -> Result, EmptyInput> where A: Float + FromPrimitive, { match self.dim() { (n, m) if n > 0 && m > 0 => { let observation_axis = Axis(1); // The ddof value doesn't matter, as long as we use the same one // for computing covariance and standard deviation // We choose 0 as it is the smallest number admitted by std_axis let ddof = A::zero(); let cov = self.cov(ddof).unwrap(); let std = self .std_axis(observation_axis, ddof) .insert_axis(observation_axis); let std_matrix = std.dot(&std.t()); // element-wise division Ok(cov / std_matrix) } _ => Err(EmptyInput), } } private_impl! {} } #[cfg(test)] mod cov_tests { use super::*; use ndarray::array; use ndarray_rand::rand; use ndarray_rand::rand_distr::Uniform; use ndarray_rand::RandomExt; use quickcheck_macros::quickcheck; #[quickcheck] fn constant_random_variables_have_zero_covariance_matrix(value: f64) -> bool { let n_random_variables = 3; let n_observations = 4; let a = Array::from_elem((n_random_variables, n_observations), value); abs_diff_eq!( a.cov(1.).unwrap(), &Array::zeros((n_random_variables, n_random_variables)), epsilon = 1e-8, ) } #[quickcheck] fn covariance_matrix_is_symmetric(bound: f64) -> bool { let n_random_variables = 3; let n_observations = 4; let a = Array::random( (n_random_variables, n_observations), Uniform::new(-bound.abs(), bound.abs()).unwrap(), ); let covariance = a.cov(1.).unwrap(); abs_diff_eq!(covariance, &covariance.t(), epsilon = 1e-8) } #[test] #[should_panic] fn test_invalid_ddof() { let n_random_variables = 3; let n_observations = 4; let a = Array::random( (n_random_variables, n_observations), Uniform::new(0., 10.).unwrap(), ); let invalid_ddof = (n_observations as f64) + rand::random::().abs(); let _ = a.cov(invalid_ddof); } #[test] fn test_covariance_zero_variables() { let a = Array2::::zeros((0, 2)); let cov = a.cov(1.); assert!(cov.is_ok()); assert_eq!(cov.unwrap().shape(), &[0, 0]); } #[test] fn test_covariance_zero_observations() { let a = Array2::::zeros((2, 0)); // Negative ddof (-1 < 0) to avoid invalid-ddof panic let cov = a.cov(-1.); assert_eq!(cov, Err(EmptyInput)); } #[test] fn test_covariance_zero_variables_zero_observations() { let a = Array2::::zeros((0, 0)); // Negative ddof (-1 < 0) to avoid invalid-ddof panic let cov = a.cov(-1.); assert_eq!(cov, Err(EmptyInput)); } #[test] fn test_covariance_for_random_array() { let a = array![ [0.72009497, 0.12568055, 0.55705966, 0.5959984, 0.69471457], [0.56717131, 0.47619486, 0.21526298, 0.88915366, 0.91971245], [0.59044195, 0.10720363, 0.76573717, 0.54693675, 0.95923036], [0.24102952, 0.131347, 0.11118028, 0.21451351, 0.30515539], [0.26952473, 0.93079841, 0.8080893, 0.42814155, 0.24642258] ]; let numpy_covariance = array![ [0.05786248, 0.02614063, 0.06446215, 0.01285105, -0.06443992], [0.02614063, 0.08733569, 0.02436933, 0.01977437, -0.06715555], [0.06446215, 0.02436933, 0.10052129, 0.01393589, -0.06129912], [0.01285105, 0.01977437, 0.01393589, 0.00638795, -0.02355557], [ -0.06443992, -0.06715555, -0.06129912, -0.02355557, 0.09909855 ] ]; assert_eq!(a.ndim(), 2); assert_abs_diff_eq!(a.cov(1.).unwrap(), &numpy_covariance, epsilon = 1e-8); } #[test] #[should_panic] // We lose precision, hence the failing assert fn test_covariance_for_badly_conditioned_array() { let a: Array2 = array![[1e12 + 1., 1e12 - 1.], [1e-6 + 1e-12, 1e-6 - 1e-12],]; let expected_covariance = array![[2., 2e-12], [2e-12, 2e-24]]; assert_abs_diff_eq!(a.cov(1.).unwrap(), &expected_covariance, epsilon = 1e-24); } } #[cfg(test)] mod pearson_correlation_tests { use super::*; use ndarray::array; use ndarray::Array; use ndarray_rand::rand_distr::Uniform; use ndarray_rand::RandomExt; use quickcheck_macros::quickcheck; #[quickcheck] fn output_matrix_is_symmetric(bound: f64) -> bool { let n_random_variables = 3; let n_observations = 4; let a = Array::random( (n_random_variables, n_observations), Uniform::new(-bound.abs(), bound.abs()).unwrap(), ); let pearson_correlation = a.pearson_correlation().unwrap(); abs_diff_eq!( pearson_correlation.view(), pearson_correlation.t(), epsilon = 1e-8 ) } #[quickcheck] fn constant_random_variables_have_nan_correlation(value: f64) -> bool { let n_random_variables = 3; let n_observations = 4; let a = Array::from_elem((n_random_variables, n_observations), value); let pearson_correlation = a.pearson_correlation(); pearson_correlation .unwrap() .iter() .map(|x| x.is_nan()) .fold(true, |acc, flag| acc & flag) } #[test] fn test_zero_variables() { let a = Array2::::zeros((0, 2)); let pearson_correlation = a.pearson_correlation(); assert_eq!(pearson_correlation, Err(EmptyInput)) } #[test] fn test_zero_observations() { let a = Array2::::zeros((2, 0)); let pearson = a.pearson_correlation(); assert_eq!(pearson, Err(EmptyInput)); } #[test] fn test_zero_variables_zero_observations() { let a = Array2::::zeros((0, 0)); let pearson = a.pearson_correlation(); assert_eq!(pearson, Err(EmptyInput)); } #[test] fn test_for_random_array() { let a = array![ [0.16351516, 0.56863268, 0.16924196, 0.72579120], [0.44342453, 0.19834387, 0.25411802, 0.62462382], [0.97162731, 0.29958849, 0.17338142, 0.80198342], [0.91727132, 0.79817799, 0.62237124, 0.38970998], [0.26979716, 0.20887228, 0.95454999, 0.96290785] ]; let numpy_corrcoeff = array![ [1., 0.38089376, 0.08122504, -0.59931623, 0.1365648], [0.38089376, 1., 0.80918429, -0.52615195, 0.38954398], [0.08122504, 0.80918429, 1., 0.07134906, -0.17324776], [-0.59931623, -0.52615195, 0.07134906, 1., -0.8743213], [0.1365648, 0.38954398, -0.17324776, -0.8743213, 1.] ]; assert_eq!(a.ndim(), 2); assert_abs_diff_eq!( a.pearson_correlation().unwrap(), numpy_corrcoeff, epsilon = 1e-7 ); } } ================================================ FILE: src/deviation.rs ================================================ use ndarray::{ArrayRef, Dimension, Zip}; use num_traits::{Signed, ToPrimitive}; use std::convert::Into; use std::ops::AddAssign; use crate::errors::MultiInputError; /// An extension trait for `ndarray` providing functions /// to compute different deviation measures. pub trait DeviationExt where D: Dimension, { /// Counts the number of indices at which the elements of the arrays `self` /// and `other` are equal. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape fn count_eq(&self, other: &ArrayRef) -> Result where A: PartialEq; /// Counts the number of indices at which the elements of the arrays `self` /// and `other` are not equal. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape fn count_neq(&self, other: &ArrayRef) -> Result where A: PartialEq; /// Computes the [squared L2 distance] between `self` and `other`. /// /// ```text /// n /// ∑ |aᵢ - bᵢ|² /// i=1 /// ``` /// /// where `self` is `a` and `other` is `b`. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape /// /// [squared L2 distance]: https://en.wikipedia.org/wiki/Euclidean_distance#Squared_Euclidean_distance fn sq_l2_dist(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed; /// Computes the [L2 distance] between `self` and `other`. /// /// ```text /// n /// √ ( ∑ |aᵢ - bᵢ|² ) /// i=1 /// ``` /// /// where `self` is `a` and `other` is `b`. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape /// /// **Panics** if the type cast from `A` to `f64` fails. /// /// [L2 distance]: https://en.wikipedia.org/wiki/Euclidean_distance fn l2_dist(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed + ToPrimitive; /// Computes the [L1 distance] between `self` and `other`. /// /// ```text /// n /// ∑ |aᵢ - bᵢ| /// i=1 /// ``` /// /// where `self` is `a` and `other` is `b`. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape /// /// [L1 distance]: https://en.wikipedia.org/wiki/Taxicab_geometry fn l1_dist(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed; /// Computes the [L∞ distance] between `self` and `other`. /// /// ```text /// max(|aᵢ - bᵢ|) /// ᵢ /// ``` /// /// where `self` is `a` and `other` is `b`. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape /// /// [L∞ distance]: https://en.wikipedia.org/wiki/Chebyshev_distance fn linf_dist(&self, other: &ArrayRef) -> Result where A: Clone + PartialOrd + Signed; /// Computes the [mean absolute error] between `self` and `other`. /// /// ```text /// n /// 1/n * ∑ |aᵢ - bᵢ| /// i=1 /// ``` /// /// where `self` is `a` and `other` is `b`. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape /// /// **Panics** if the type cast from `A` to `f64` fails. /// /// [mean absolute error]: https://en.wikipedia.org/wiki/Mean_absolute_error fn mean_abs_err(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed + ToPrimitive; /// Computes the [mean squared error] between `self` and `other`. /// /// ```text /// n /// 1/n * ∑ |aᵢ - bᵢ|² /// i=1 /// ``` /// /// where `self` is `a` and `other` is `b`. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape /// /// **Panics** if the type cast from `A` to `f64` fails. /// /// [mean squared error]: https://en.wikipedia.org/wiki/Mean_squared_error fn mean_sq_err(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed + ToPrimitive; /// Computes the unnormalized [root-mean-square error] between `self` and `other`. /// /// ```text /// √ mse(a, b) /// ``` /// /// where `self` is `a`, `other` is `b` and `mse` is the mean-squared-error. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape /// /// **Panics** if the type cast from `A` to `f64` fails. /// /// [root-mean-square error]: https://en.wikipedia.org/wiki/Root-mean-square_deviation fn root_mean_sq_err(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed + ToPrimitive; /// Computes the [peak signal-to-noise ratio] between `self` and `other`. /// /// ```text /// 10 * log10(maxv^2 / mse(a, b)) /// ``` /// /// where `self` is `a`, `other` is `b`, `mse` is the mean-squared-error /// and `maxv` is the maximum possible value either array can take. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `other` don't have the same shape /// /// **Panics** if the type cast from `A` to `f64` fails. /// /// [peak signal-to-noise ratio]: https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio fn peak_signal_to_noise_ratio( &self, other: &ArrayRef, maxv: A, ) -> Result where A: AddAssign + Clone + Signed + ToPrimitive; private_decl! {} } impl DeviationExt for ArrayRef where D: Dimension, { fn count_eq(&self, other: &ArrayRef) -> Result where A: PartialEq, { return_err_if_empty!(self); return_err_unless_same_shape!(self, other); let mut count = 0; Zip::from(self).and(other).for_each(|a, b| { if a == b { count += 1; } }); Ok(count) } fn count_neq(&self, other: &ArrayRef) -> Result where A: PartialEq, { self.count_eq(other).map(|n_eq| self.len() - n_eq) } fn sq_l2_dist(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed, { return_err_if_empty!(self); return_err_unless_same_shape!(self, other); let mut result = A::zero(); Zip::from(self).and(other).for_each(|self_i, other_i| { let (a, b) = (self_i.clone(), other_i.clone()); let diff = a - b; result += diff.clone() * diff; }); Ok(result) } fn l2_dist(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed + ToPrimitive, { let sq_l2_dist = self .sq_l2_dist(other)? .to_f64() .expect("failed cast from type A to f64"); Ok(sq_l2_dist.sqrt()) } fn l1_dist(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed, { return_err_if_empty!(self); return_err_unless_same_shape!(self, other); let mut result = A::zero(); Zip::from(self).and(other).for_each(|self_i, other_i| { let (a, b) = (self_i.clone(), other_i.clone()); result += (a - b).abs(); }); Ok(result) } fn linf_dist(&self, other: &ArrayRef) -> Result where A: Clone + PartialOrd + Signed, { return_err_if_empty!(self); return_err_unless_same_shape!(self, other); let mut max = A::zero(); Zip::from(self).and(other).for_each(|self_i, other_i| { let (a, b) = (self_i.clone(), other_i.clone()); let diff = (a - b).abs(); if diff > max { max = diff; } }); Ok(max) } fn mean_abs_err(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed + ToPrimitive, { let l1_dist = self .l1_dist(other)? .to_f64() .expect("failed cast from type A to f64"); let n = self.len() as f64; Ok(l1_dist / n) } fn mean_sq_err(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed + ToPrimitive, { let sq_l2_dist = self .sq_l2_dist(other)? .to_f64() .expect("failed cast from type A to f64"); let n = self.len() as f64; Ok(sq_l2_dist / n) } fn root_mean_sq_err(&self, other: &ArrayRef) -> Result where A: AddAssign + Clone + Signed + ToPrimitive, { let msd = self.mean_sq_err(other)?; Ok(msd.sqrt()) } fn peak_signal_to_noise_ratio( &self, other: &ArrayRef, maxv: A, ) -> Result where A: AddAssign + Clone + Signed + ToPrimitive, { let maxv_f = maxv.to_f64().expect("failed cast from type A to f64"); let msd = self.mean_sq_err(&other)?; let psnr = 10. * f64::log10(maxv_f * maxv_f / msd); Ok(psnr) } private_impl! {} } ================================================ FILE: src/entropy.rs ================================================ //! Information theory (e.g. entropy, KL divergence, etc.). use crate::errors::{EmptyInput, MultiInputError, ShapeMismatch}; use ndarray::{Array, ArrayRef, Dimension, Zip}; use num_traits::Float; /// Extension trait for `ndarray` providing methods /// to compute information theory quantities /// (e.g. entropy, Kullback–Leibler divergence, etc.). pub trait EntropyExt where D: Dimension, { /// Computes the [entropy] *S* of the array values, defined as /// /// ```text /// n /// S = - ∑ xᵢ ln(xᵢ) /// i=1 /// ``` /// /// If the array is empty, `Err(EmptyInput)` is returned. /// /// **Panics** if `ln` of any element in the array panics (which can occur for negative values for some `A`). /// /// ## Remarks /// /// The entropy is a measure used in [Information Theory] /// to describe a probability distribution: it only make sense /// when the array values sum to 1, with each entry between /// 0 and 1 (extremes included). /// /// The array values are **not** normalised by this function before /// computing the entropy to avoid introducing potentially /// unnecessary numerical errors (e.g. if the array were to be already normalised). /// /// By definition, *xᵢ ln(xᵢ)* is set to 0 if *xᵢ* is 0. /// /// [entropy]: https://en.wikipedia.org/wiki/Entropy_(information_theory) /// [Information Theory]: https://en.wikipedia.org/wiki/Information_theory fn entropy(&self) -> Result where A: Float; /// Computes the [Kullback-Leibler divergence] *Dₖₗ(p,q)* between two arrays, /// where `self`=*p*. /// /// The Kullback-Leibler divergence is defined as: /// /// ```text /// n /// Dₖₗ(p,q) = - ∑ pᵢ ln(qᵢ/pᵢ) /// i=1 /// ``` /// /// If the arrays are empty, `Err(MultiInputError::EmptyInput)` is returned. /// If the array shapes are not identical, /// `Err(MultiInputError::ShapeMismatch)` is returned. /// /// **Panics** if, for a pair of elements *(pᵢ, qᵢ)* from *p* and *q*, computing /// *ln(qᵢ/pᵢ)* is a panic cause for `A`. /// /// ## Remarks /// /// The Kullback-Leibler divergence is a measure used in [Information Theory] /// to describe the relationship between two probability distribution: it only make sense /// when each array sums to 1 with entries between 0 and 1 (extremes included). /// /// The array values are **not** normalised by this function before /// computing the entropy to avoid introducing potentially /// unnecessary numerical errors (e.g. if the array were to be already normalised). /// /// By definition, *pᵢ ln(qᵢ/pᵢ)* is set to 0 if *pᵢ* is 0. /// /// [Kullback-Leibler divergence]: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence /// [Information Theory]: https://en.wikipedia.org/wiki/Information_theory fn kl_divergence(&self, q: &ArrayRef) -> Result where A: Float; /// Computes the [cross entropy] *H(p,q)* between two arrays, /// where `self`=*p*. /// /// The cross entropy is defined as: /// /// ```text /// n /// H(p,q) = - ∑ pᵢ ln(qᵢ) /// i=1 /// ``` /// /// If the arrays are empty, `Err(MultiInputError::EmptyInput)` is returned. /// If the array shapes are not identical, /// `Err(MultiInputError::ShapeMismatch)` is returned. /// /// **Panics** if any element in *q* is negative and taking the logarithm of a negative number /// is a panic cause for `A`. /// /// ## Remarks /// /// The cross entropy is a measure used in [Information Theory] /// to describe the relationship between two probability distributions: it only makes sense /// when each array sums to 1 with entries between 0 and 1 (extremes included). /// /// The array values are **not** normalised by this function before /// computing the entropy to avoid introducing potentially /// unnecessary numerical errors (e.g. if the array were to be already normalised). /// /// The cross entropy is often used as an objective/loss function in /// [optimization problems], including [machine learning]. /// /// By definition, *pᵢ ln(qᵢ)* is set to 0 if *pᵢ* is 0. /// /// [cross entropy]: https://en.wikipedia.org/wiki/Cross-entropy /// [Information Theory]: https://en.wikipedia.org/wiki/Information_theory /// [optimization problems]: https://en.wikipedia.org/wiki/Cross-entropy_method /// [machine learning]: https://en.wikipedia.org/wiki/Cross_entropy#Cross-entropy_error_function_and_logistic_regression fn cross_entropy(&self, q: &ArrayRef) -> Result where A: Float; private_decl! {} } impl EntropyExt for ArrayRef where D: Dimension, { fn entropy(&self) -> Result where A: Float, { if self.is_empty() { Err(EmptyInput) } else { let entropy = -self .mapv(|x| { if x == A::zero() { A::zero() } else { x * x.ln() } }) .sum(); Ok(entropy) } } fn kl_divergence(&self, q: &ArrayRef) -> Result where A: Float, { if self.is_empty() { return Err(MultiInputError::EmptyInput); } if self.shape() != q.shape() { return Err(ShapeMismatch { first_shape: self.shape().to_vec(), second_shape: q.shape().to_vec(), } .into()); } let mut temp = Array::zeros(self.raw_dim()); Zip::from(&mut temp) .and(self) .and(q) .for_each(|result, &p, &q| { *result = { if p == A::zero() { A::zero() } else { p * (q / p).ln() } } }); let kl_divergence = -temp.sum(); Ok(kl_divergence) } fn cross_entropy(&self, q: &ArrayRef) -> Result where A: Float, { if self.is_empty() { return Err(MultiInputError::EmptyInput); } if self.shape() != q.shape() { return Err(ShapeMismatch { first_shape: self.shape().to_vec(), second_shape: q.shape().to_vec(), } .into()); } let mut temp = Array::zeros(self.raw_dim()); Zip::from(&mut temp) .and(self) .and(q) .for_each(|result, &p, &q| { *result = { if p == A::zero() { A::zero() } else { p * q.ln() } } }); let cross_entropy = -temp.sum(); Ok(cross_entropy) } private_impl! {} } #[cfg(test)] mod tests { use super::EntropyExt; use crate::errors::{EmptyInput, MultiInputError}; use approx::assert_abs_diff_eq; use ndarray::{array, Array1}; use noisy_float::types::n64; use std::f64; #[test] fn test_entropy_with_nan_values() { let a = array![f64::NAN, 1.]; assert!(a.entropy().unwrap().is_nan()); } #[test] fn test_entropy_with_empty_array_of_floats() { let a: Array1 = array![]; assert_eq!(a.entropy(), Err(EmptyInput)); } #[test] fn test_entropy_with_array_of_floats() { // Array of probability values - normalized and positive. let a: Array1 = array![ 0.03602474, 0.01900344, 0.03510129, 0.03414964, 0.00525311, 0.03368976, 0.00065396, 0.02906146, 0.00063687, 0.01597306, 0.00787625, 0.00208243, 0.01450896, 0.01803418, 0.02055336, 0.03029759, 0.03323628, 0.01218822, 0.0001873, 0.01734179, 0.03521668, 0.02564429, 0.02421992, 0.03540229, 0.03497635, 0.03582331, 0.026558, 0.02460495, 0.02437716, 0.01212838, 0.00058464, 0.00335236, 0.02146745, 0.00930306, 0.01821588, 0.02381928, 0.02055073, 0.01483779, 0.02284741, 0.02251385, 0.00976694, 0.02864634, 0.00802828, 0.03464088, 0.03557152, 0.01398894, 0.01831756, 0.0227171, 0.00736204, 0.01866295, ]; // Computed using scipy.stats.entropy let expected_entropy = 3.721606155686918; assert_abs_diff_eq!(a.entropy().unwrap(), expected_entropy, epsilon = 1e-6); } #[test] fn test_cross_entropy_and_kl_with_nan_values() -> Result<(), MultiInputError> { let a = array![f64::NAN, 1.]; let b = array![2., 1.]; assert!(a.cross_entropy(&b)?.is_nan()); assert!(b.cross_entropy(&a)?.is_nan()); assert!(a.kl_divergence(&b)?.is_nan()); assert!(b.kl_divergence(&a)?.is_nan()); Ok(()) } #[test] fn test_cross_entropy_and_kl_with_same_n_dimension_but_different_n_elements() { let p = array![f64::NAN, 1.]; let q = array![2., 1., 5.]; assert!(q.cross_entropy(&p).is_err()); assert!(p.cross_entropy(&q).is_err()); assert!(q.kl_divergence(&p).is_err()); assert!(p.kl_divergence(&q).is_err()); } #[test] fn test_cross_entropy_and_kl_with_different_shape_but_same_n_elements() { // p: 3x2, 6 elements let p = array![[f64::NAN, 1.], [6., 7.], [10., 20.]]; // q: 2x3, 6 elements let q = array![[2., 1., 5.], [1., 1., 7.],]; assert!(q.cross_entropy(&p).is_err()); assert!(p.cross_entropy(&q).is_err()); assert!(q.kl_divergence(&p).is_err()); assert!(p.kl_divergence(&q).is_err()); } #[test] fn test_cross_entropy_and_kl_with_empty_array_of_floats() { let p: Array1 = array![]; let q: Array1 = array![]; assert!(p.cross_entropy(&q).unwrap_err().is_empty_input()); assert!(p.kl_divergence(&q).unwrap_err().is_empty_input()); } #[test] fn test_cross_entropy_and_kl_with_negative_qs() -> Result<(), MultiInputError> { let p = array![1.]; let q = array![-1.]; let cross_entropy: f64 = p.cross_entropy(&q)?; let kl_divergence: f64 = p.kl_divergence(&q)?; assert!(cross_entropy.is_nan()); assert!(kl_divergence.is_nan()); Ok(()) } #[test] #[should_panic] fn test_cross_entropy_with_noisy_negative_qs() { let p = array![n64(1.)]; let q = array![n64(-1.)]; let _ = p.cross_entropy(&q); } #[test] #[should_panic] fn test_kl_with_noisy_negative_qs() { let p = array![n64(1.)]; let q = array![n64(-1.)]; let _ = p.kl_divergence(&q); } #[test] fn test_cross_entropy_and_kl_with_zeroes_p() -> Result<(), MultiInputError> { let p = array![0., 0.]; let q = array![0., 0.5]; assert_eq!(p.cross_entropy(&q)?, 0.); assert_eq!(p.kl_divergence(&q)?, 0.); Ok(()) } #[test] fn test_cross_entropy_and_kl_with_zeroes_q_and_different_data_ownership( ) -> Result<(), MultiInputError> { let p = array![0.5, 0.5]; let mut q = array![0.5, 0.]; assert_eq!(p.cross_entropy(&q.view_mut())?, f64::INFINITY); assert_eq!(p.kl_divergence(&q.view_mut())?, f64::INFINITY); Ok(()) } #[test] fn test_cross_entropy() -> Result<(), MultiInputError> { // Arrays of probability values - normalized and positive. let p: Array1 = array![ 0.05340169, 0.02508511, 0.03460454, 0.00352313, 0.07837615, 0.05859495, 0.05782189, 0.0471258, 0.05594036, 0.01630048, 0.07085162, 0.05365855, 0.01959158, 0.05020174, 0.03801479, 0.00092234, 0.08515856, 0.00580683, 0.0156542, 0.0860375, 0.0724246, 0.00727477, 0.01004402, 0.01854399, 0.03504082, ]; let q: Array1 = array![ 0.06622616, 0.0478948, 0.03227816, 0.06460884, 0.05795974, 0.01377489, 0.05604812, 0.01202684, 0.01647579, 0.03392697, 0.01656126, 0.00867528, 0.0625685, 0.07381292, 0.05489067, 0.01385491, 0.03639174, 0.00511611, 0.05700415, 0.05183825, 0.06703064, 0.01813342, 0.0007763, 0.0735472, 0.05857833, ]; // Computed using scipy.stats.entropy(p) + scipy.stats.entropy(p, q) let expected_cross_entropy = 3.385347705020779; assert_abs_diff_eq!(p.cross_entropy(&q)?, expected_cross_entropy, epsilon = 1e-6); Ok(()) } #[test] fn test_kl() -> Result<(), MultiInputError> { // Arrays of probability values - normalized and positive. let p: Array1 = array![ 0.00150472, 0.01388706, 0.03495376, 0.03264211, 0.03067355, 0.02183501, 0.00137516, 0.02213802, 0.02745017, 0.02163975, 0.0324602, 0.03622766, 0.00782343, 0.00222498, 0.03028156, 0.02346124, 0.00071105, 0.00794496, 0.0127609, 0.02899124, 0.01281487, 0.0230803, 0.01531864, 0.00518158, 0.02233383, 0.0220279, 0.03196097, 0.03710063, 0.01817856, 0.03524661, 0.02902393, 0.00853364, 0.01255615, 0.03556958, 0.00400151, 0.01335932, 0.01864965, 0.02371322, 0.02026543, 0.0035375, 0.01988341, 0.02621831, 0.03564644, 0.01389121, 0.03151622, 0.03195532, 0.00717521, 0.03547256, 0.00371394, 0.01108706, ]; let q: Array1 = array![ 0.02038386, 0.03143914, 0.02630206, 0.0171595, 0.0067072, 0.00911324, 0.02635717, 0.01269113, 0.0302361, 0.02243133, 0.01902902, 0.01297185, 0.02118908, 0.03309548, 0.01266687, 0.0184529, 0.01830936, 0.03430437, 0.02898924, 0.02238251, 0.0139771, 0.01879774, 0.02396583, 0.03019978, 0.01421278, 0.02078981, 0.03542451, 0.02887438, 0.01261783, 0.01014241, 0.03263407, 0.0095969, 0.01923903, 0.0051315, 0.00924686, 0.00148845, 0.00341391, 0.01480373, 0.01920798, 0.03519871, 0.03315135, 0.02099325, 0.03251755, 0.00337555, 0.03432165, 0.01763753, 0.02038337, 0.01923023, 0.01438769, 0.02082707, ]; // Computed using scipy.stats.entropy(p, q) let expected_kl = 0.3555862567800096; assert_abs_diff_eq!(p.kl_divergence(&q)?, expected_kl, epsilon = 1e-6); Ok(()) } } ================================================ FILE: src/errors.rs ================================================ //! Custom errors returned from our methods and functions. use noisy_float::types::N64; use std::error::Error; use std::fmt; /// An error that indicates that the input array was empty. #[derive(Clone, Debug, Eq, PartialEq)] pub struct EmptyInput; impl fmt::Display for EmptyInput { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Empty input.") } } impl Error for EmptyInput {} /// An error computing a minimum/maximum value. #[derive(Clone, Debug, Eq, PartialEq)] pub enum MinMaxError { /// The input was empty. EmptyInput, /// The ordering between a tested pair of values was undefined. UndefinedOrder, } impl fmt::Display for MinMaxError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { MinMaxError::EmptyInput => write!(f, "Empty input."), MinMaxError::UndefinedOrder => { write!(f, "Undefined ordering between a tested pair of values.") } } } } impl Error for MinMaxError {} impl From for MinMaxError { fn from(_: EmptyInput) -> MinMaxError { MinMaxError::EmptyInput } } /// An error used by methods and functions that take two arrays as argument and /// expect them to have exactly the same shape /// (e.g. `ShapeMismatch` is raised when `a.shape() == b.shape()` evaluates to `False`). #[derive(Clone, Debug, PartialEq)] pub struct ShapeMismatch { pub first_shape: Vec, pub second_shape: Vec, } impl fmt::Display for ShapeMismatch { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "Array shapes do not match: {:?} and {:?}.", self.first_shape, self.second_shape ) } } impl Error for ShapeMismatch {} /// An error for methods that take multiple non-empty array inputs. #[derive(Clone, Debug, PartialEq)] pub enum MultiInputError { /// One or more of the arrays were empty. EmptyInput, /// The arrays did not have the same shape. ShapeMismatch(ShapeMismatch), } impl MultiInputError { /// Returns whether `self` is the `EmptyInput` variant. pub fn is_empty_input(&self) -> bool { match self { MultiInputError::EmptyInput => true, _ => false, } } /// Returns whether `self` is the `ShapeMismatch` variant. pub fn is_shape_mismatch(&self) -> bool { match self { MultiInputError::ShapeMismatch(_) => true, _ => false, } } } impl fmt::Display for MultiInputError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { MultiInputError::EmptyInput => write!(f, "Empty input."), MultiInputError::ShapeMismatch(e) => write!(f, "Shape mismatch: {}", e), } } } impl Error for MultiInputError {} impl From for MultiInputError { fn from(_: EmptyInput) -> Self { MultiInputError::EmptyInput } } impl From for MultiInputError { fn from(err: ShapeMismatch) -> Self { MultiInputError::ShapeMismatch(err) } } /// An error computing a quantile. #[derive(Debug, Clone, Eq, PartialEq)] pub enum QuantileError { /// The input was empty. EmptyInput, /// The `q` was not between `0.` and `1.` (inclusive). InvalidQuantile(N64), } impl fmt::Display for QuantileError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { QuantileError::EmptyInput => write!(f, "Empty input."), QuantileError::InvalidQuantile(q) => { write!(f, "{:} is not between 0. and 1. (inclusive).", q) } } } } impl Error for QuantileError {} impl From for QuantileError { fn from(_: EmptyInput) -> QuantileError { QuantileError::EmptyInput } } ================================================ FILE: src/histogram/bins.rs ================================================ #![warn(missing_docs, clippy::all, clippy::pedantic)] use ndarray::prelude::*; use std::ops::{Index, Range}; /// A sorted collection of type `A` elements used to represent the boundaries of intervals, i.e. /// [`Bins`] on a 1-dimensional axis. /// /// **Note** that all intervals are left-closed and right-open. See examples below. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::{Bins, Edges}; /// use noisy_float::types::n64; /// /// let unit_edges = Edges::from(vec![n64(0.), n64(1.)]); /// let unit_interval = Bins::new(unit_edges); /// // left-closed /// assert_eq!( /// unit_interval.range_of(&n64(0.)).unwrap(), /// n64(0.)..n64(1.), /// ); /// // right-open /// assert_eq!( /// unit_interval.range_of(&n64(1.)), /// None /// ); /// ``` /// /// [`Bins`]: struct.Bins.html #[derive(Clone, Debug, Eq, PartialEq)] pub struct Edges { edges: Vec, } impl From> for Edges { /// Converts a `Vec` into an `Edges`, consuming the edges. /// The vector will be sorted in increasing order using an unstable sorting algorithm, with /// duplicates removed. /// /// # Current implementation /// /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort], /// which is based on [pattern-defeating quicksort][pdqsort]. /// /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate) /// , and O(n log n) worst-case. /// /// # Examples /// /// ``` /// use ndarray::array; /// use ndarray_stats::histogram::Edges; /// /// let edges = Edges::from(array![1, 15, 10, 10, 20]); /// // The array gets sorted! /// assert_eq!( /// edges[2], /// 15 /// ); /// ``` /// /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable /// [pdqsort]: https://github.com/orlp/pdqsort fn from(mut edges: Vec) -> Self { // sort the array in-place edges.sort_unstable(); // remove duplicates edges.dedup(); Edges { edges } } } impl From> for Edges { /// Converts an `Array1` into an `Edges`, consuming the 1-dimensional array. /// The array will be sorted in increasing order using an unstable sorting algorithm, with /// duplicates removed. /// /// # Current implementation /// /// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort], /// which is based on [pattern-defeating quicksort][pdqsort]. /// /// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate) /// , and O(n log n) worst-case. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::Edges; /// /// let edges = Edges::from(vec![1, 15, 10, 20]); /// // The vec gets sorted! /// assert_eq!( /// edges[1], /// 10 /// ); /// ``` /// /// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable /// [pdqsort]: https://github.com/orlp/pdqsort fn from(edges: Array1) -> Self { let edges = edges.to_vec(); Self::from(edges) } } impl Index for Edges { type Output = A; /// Returns a reference to the `i`-th edge in `self`. /// /// # Panics /// /// Panics if the index `i` is out of bounds. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::Edges; /// /// let edges = Edges::from(vec![1, 5, 10, 20]); /// assert_eq!( /// edges[1], /// 5 /// ); /// ``` fn index(&self, i: usize) -> &Self::Output { &self.edges[i] } } impl Edges { /// Returns the number of edges in `self`. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::Edges; /// use noisy_float::types::n64; /// /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]); /// assert_eq!( /// edges.len(), /// 3 /// ); /// ``` #[must_use] pub fn len(&self) -> usize { self.edges.len() } /// Returns `true` if `self` contains no edges. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::Edges; /// use noisy_float::types::{N64, n64}; /// /// let edges = Edges::::from(vec![]); /// assert_eq!(edges.is_empty(), true); /// /// let edges = Edges::from(vec![n64(0.), n64(2.), n64(5.)]); /// assert_eq!(edges.is_empty(), false); /// ``` #[must_use] pub fn is_empty(&self) -> bool { self.edges.is_empty() } /// Returns an immutable 1-dimensional array view of edges. /// /// # Examples /// /// ``` /// use ndarray::array; /// use ndarray_stats::histogram::Edges; /// /// let edges = Edges::from(vec![0, 5, 3]); /// assert_eq!( /// edges.as_array_view(), /// array![0, 3, 5].view() /// ); /// ``` #[must_use] pub fn as_array_view(&self) -> ArrayView1<'_, A> { ArrayView1::from(&self.edges) } /// Returns indices of two consecutive `edges` in `self`, if the interval they represent /// contains the given `value`, or returns `None` otherwise. /// /// That is to say, it returns /// - `Some((left, right))`, where `left` and `right` are the indices of two consecutive edges /// in `self` and `right == left + 1`, if `self[left] <= value < self[right]`; /// - `None`, otherwise. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::Edges; /// /// let edges = Edges::from(vec![0, 2, 3]); /// // `1` is in the interval [0, 2), whose indices are (0, 1) /// assert_eq!( /// edges.indices_of(&1), /// Some((0, 1)) /// ); /// // `5` is not in any of intervals /// assert_eq!( /// edges.indices_of(&5), /// None /// ); /// ``` pub fn indices_of(&self, value: &A) -> Option<(usize, usize)> { // binary search for the correct bin let n_edges = self.len(); match self.edges.binary_search(value) { Ok(i) if i == n_edges - 1 => None, Ok(i) => Some((i, i + 1)), Err(i) => match i { 0 => None, j if j == n_edges => None, j => Some((j - 1, j)), }, } } /// Returns an iterator over the `edges` in `self`. pub fn iter(&self) -> impl Iterator { self.edges.iter() } } /// A sorted collection of non-overlapping 1-dimensional intervals. /// /// **Note** that all intervals are left-closed and right-open. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::{Edges, Bins}; /// use noisy_float::types::n64; /// /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]); /// let bins = Bins::new(edges); /// // first bin /// assert_eq!( /// bins.index(0), /// n64(0.)..n64(1.) // n64(1.) is not included in the bin! /// ); /// // second bin /// assert_eq!( /// bins.index(1), /// n64(1.)..n64(2.) /// ); /// ``` #[derive(Clone, Debug, Eq, PartialEq)] pub struct Bins { edges: Edges, } impl Bins { /// Returns a `Bins` instance where each bin corresponds to two consecutive members of the given /// [`Edges`], consuming the edges. /// /// [`Edges`]: struct.Edges.html #[must_use] pub fn new(edges: Edges) -> Self { Bins { edges } } /// Returns the number of bins in `self`. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::{Edges, Bins}; /// use noisy_float::types::n64; /// /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(2.)]); /// let bins = Bins::new(edges); /// assert_eq!( /// bins.len(), /// 2 /// ); /// ``` #[must_use] pub fn len(&self) -> usize { match self.edges.len() { 0 => 0, n => n - 1, } } /// Returns `true` if the number of bins is zero, i.e. if the number of edges is 0 or 1. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::{Edges, Bins}; /// use noisy_float::types::{N64, n64}; /// /// // At least 2 edges is needed to represent 1 interval /// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]); /// let bins = Bins::new(edges); /// assert_eq!(bins.is_empty(), false); /// /// // No valid interval == Empty /// let edges = Edges::::from(vec![]); /// let bins = Bins::new(edges); /// assert_eq!(bins.is_empty(), true); /// let edges = Edges::from(vec![n64(0.)]); /// let bins = Bins::new(edges); /// assert_eq!(bins.is_empty(), true); /// ``` #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Returns the index of the bin in `self` that contains the given `value`, /// or returns `None` if `value` does not belong to any bins in `self`. /// /// # Examples /// /// Basic usage: /// /// ``` /// use ndarray_stats::histogram::{Edges, Bins}; /// /// let edges = Edges::from(vec![0, 2, 4, 6]); /// let bins = Bins::new(edges); /// let value = 1; /// // The first bin [0, 2) contains `1` /// assert_eq!( /// bins.index_of(&1), /// Some(0) /// ); /// // No bin contains 100 /// assert_eq!( /// bins.index_of(&100), /// None /// ) /// ``` /// /// Chaining [`Bins::index`] and [`Bins::index_of`] to get the boundaries of the bin containing /// the value: /// /// ``` /// # use ndarray_stats::histogram::{Edges, Bins}; /// # let edges = Edges::from(vec![0, 2, 4, 6]); /// # let bins = Bins::new(edges); /// # let value = 1; /// assert_eq!( /// // using `Option::map` to avoid panic on index out-of-bounds /// bins.index_of(&1).map(|i| bins.index(i)), /// Some(0..2) /// ); /// ``` pub fn index_of(&self, value: &A) -> Option { self.edges.indices_of(value).map(|t| t.0) } /// Returns a range as the bin which contains the given `value`, or returns `None` otherwise. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::{Edges, Bins}; /// /// let edges = Edges::from(vec![0, 2, 4, 6]); /// let bins = Bins::new(edges); /// // [0, 2) contains `1` /// assert_eq!( /// bins.range_of(&1), /// Some(0..2) /// ); /// // `10` is not in any interval /// assert_eq!( /// bins.range_of(&10), /// None /// ); /// ``` pub fn range_of(&self, value: &A) -> Option> where A: Clone, { let edges_indexes = self.edges.indices_of(value); edges_indexes.map(|(left, right)| Range { start: self.edges[left].clone(), end: self.edges[right].clone(), }) } /// Returns a range as the bin at the given `index` position. /// /// # Panics /// /// Panics if `index` is out of bounds. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::{Edges, Bins}; /// /// let edges = Edges::from(vec![1, 5, 10, 20]); /// let bins = Bins::new(edges); /// assert_eq!( /// bins.index(1), /// 5..10 /// ); /// ``` #[must_use] pub fn index(&self, index: usize) -> Range where A: Clone, { // It was not possible to implement this functionality // using the `Index` trait unless we were willing to // allocate a `Vec>` in the struct. // Index, in fact, forces you to return a reference. Range { start: self.edges[index].clone(), end: self.edges[index + 1].clone(), } } } #[cfg(test)] mod edges_tests { use super::{Array1, Edges}; use quickcheck_macros::quickcheck; use std::collections::BTreeSet; use std::iter::FromIterator; #[quickcheck] fn check_sorted_from_vec(v: Vec) -> bool { let edges = Edges::from(v); let n = edges.len(); for i in 1..n { if edges[i - 1] > edges[i] { return false; } } true } #[quickcheck] fn check_sorted_from_array(v: Vec) -> bool { let a = Array1::from(v); let edges = Edges::from(a); let n = edges.len(); for i in 1..n { if edges[i - 1] > edges[i] { return false; } } true } #[quickcheck] fn edges_are_right_open(v: Vec) -> bool { let edges = Edges::from(v); let view = edges.as_array_view(); if view.is_empty() { true } else { let last = view[view.len() - 1]; edges.indices_of(&last).is_none() } } #[quickcheck] fn edges_are_left_closed(v: Vec) -> bool { let edges = Edges::from(v); if let 1 = edges.len() { true } else { let view = edges.as_array_view(); if view.is_empty() { true } else { let first = view[0]; edges.indices_of(&first).is_some() } } } #[quickcheck] #[allow(clippy::needless_pass_by_value)] fn edges_are_deduped(v: Vec) -> bool { let unique_elements = BTreeSet::from_iter(v.iter()); let edges = Edges::from(v.clone()); let view = edges.as_array_view(); let unique_edges = BTreeSet::from_iter(view.iter()); unique_edges == unique_elements } } #[cfg(test)] mod bins_tests { use super::{Bins, Edges}; #[test] #[should_panic] #[allow(unused_must_use)] fn get_panics_for_out_of_bounds_indexes() { let edges = Edges::from(vec![0]); let bins = Bins::new(edges); // we need at least two edges to make a valid bin! bins.index(0); } } ================================================ FILE: src/histogram/errors.rs ================================================ use crate::errors::{EmptyInput, MinMaxError}; use std::error; use std::fmt; /// Error to denote that no bin has been found for a certain observation. #[derive(Debug, Clone)] pub struct BinNotFound; impl fmt::Display for BinNotFound { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "No bin has been found.") } } impl error::Error for BinNotFound { fn description(&self) -> &str { "No bin has been found." } } /// Error computing the set of histogram bins. #[derive(Debug, Clone)] pub enum BinsBuildError { /// The input array was empty. EmptyInput, /// The strategy for computing appropriate bins failed. Strategy, #[doc(hidden)] __NonExhaustive, } impl BinsBuildError { /// Returns whether `self` is the `EmptyInput` variant. pub fn is_empty_input(&self) -> bool { match self { BinsBuildError::EmptyInput => true, _ => false, } } /// Returns whether `self` is the `Strategy` variant. pub fn is_strategy(&self) -> bool { match self { BinsBuildError::Strategy => true, _ => false, } } } impl fmt::Display for BinsBuildError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "The strategy failed to determine a non-zero bin width.") } } impl error::Error for BinsBuildError { fn description(&self) -> &str { "The strategy failed to determine a non-zero bin width." } } impl From for BinsBuildError { fn from(_: EmptyInput) -> Self { BinsBuildError::EmptyInput } } impl From for BinsBuildError { fn from(err: MinMaxError) -> BinsBuildError { match err { MinMaxError::EmptyInput => BinsBuildError::EmptyInput, MinMaxError::UndefinedOrder => BinsBuildError::Strategy, } } } ================================================ FILE: src/histogram/grid.rs ================================================ #![warn(missing_docs, clippy::all, clippy::pedantic)] use super::{bins::Bins, errors::BinsBuildError, strategies::BinsBuildingStrategy}; use itertools::izip; use ndarray::{ArrayRef, Axis, Ix1, Ix2}; use std::ops::Range; /// An orthogonal partition of a rectangular region in an *n*-dimensional space, e.g. /// [*a*0, *b*0) × ⋯ × [*a**n*−1, *b**n*−1), /// represented as a collection of rectangular *n*-dimensional bins. /// /// The grid is **solely determined by the Cartesian product of its projections** on each coordinate /// axis. Therefore, each element in the product set should correspond to a sub-region in the grid. /// /// For example, this partition can be represented as a `Grid` struct: /// /// ```text /// /// g +---+-------+---+ /// | 3 | 4 | 5 | /// f +---+-------+---+ /// | | | | /// | 0 | 1 | 2 | /// | | | | /// e +---+-------+---+ /// a b c d /// /// R0: [a, b) × [e, f) /// R1: [b, c) × [e, f) /// R2: [c, d) × [e, f) /// R3: [a, b) × [f, g) /// R4: [b, d) × [f, g) /// R5: [c, d) × [f, g) /// Grid: { [a, b), [b, c), [c, d) } × { [e, f), [f, g) } == { R0, R1, R2, R3, R4, R5 } /// ``` /// /// while the next one can't: /// /// ```text /// g +---+-----+---+ /// | | 2 | 3 | /// (f) | +-----+---+ /// | 0 | | /// | | 1 | /// | | | /// e +---+-----+---+ /// a b c d /// /// R0: [a, b) × [e, g) /// R1: [b, d) × [e, f) /// R2: [b, c) × [f, g) /// R3: [c, d) × [f, g) /// // 'f', as long as 'R1', 'R2', or 'R3', doesn't appear on LHS /// // [b, c) × [e, g), [c, d) × [e, g) doesn't appear on RHS /// Grid: { [a, b), [b, c), [c, d) } × { [e, g) } != { R0, R1, R2, R3 } /// ``` /// /// # Examples /// /// Basic usage, building a `Grid` via [`GridBuilder`], with optimal grid layout determined by /// a given [`strategy`], and generating a [`histogram`]: /// /// ``` /// use ndarray::{Array, array}; /// use ndarray_stats::{ /// histogram::{strategies::Auto, Bins, Edges, Grid, GridBuilder}, /// HistogramExt, /// }; /// /// // 1-dimensional observations, as a (n_observations, n_dimension) 2-d matrix /// let observations = Array::from_shape_vec( /// (12, 1), /// vec![1, 4, 5, 2, 100, 20, 50, 65, 27, 40, 45, 23], /// ).unwrap(); /// /// // The optimal grid layout is inferred from the data, given a chosen strategy, Auto in this case /// let grid = GridBuilder::>::from_array(&observations).unwrap().build(); /// /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.counts(); /// // Bins are left-closed, right-open! /// let expected = array![4, 3, 3, 1, 0, 1]; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// ``` /// /// [`histogram`]: trait.HistogramExt.html /// [`GridBuilder`]: struct.GridBuilder.html /// [`strategy`]: strategies/index.html #[derive(Clone, Debug, Eq, PartialEq)] pub struct Grid { projections: Vec>, } impl From>> for Grid { /// Converts a `Vec>` into a `Grid`, consuming the vector of bins. /// /// The `i`-th element in `Vec>` represents the projection of the bin grid onto the /// `i`-th axis. /// /// Alternatively, a `Grid` can be built directly from data using a [`GridBuilder`]. /// /// [`GridBuilder`]: struct.GridBuilder.html fn from(projections: Vec>) -> Self { Grid { projections } } } impl Grid { /// Returns the number of dimensions of the region partitioned by the grid. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::{Edges, Bins, Grid}; /// /// let edges = Edges::from(vec![0, 1]); /// let bins = Bins::new(edges); /// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]); /// /// assert_eq!(square_grid.ndim(), 2usize) /// ``` #[must_use] pub fn ndim(&self) -> usize { self.projections.len() } /// Returns the numbers of bins along each coordinate axis. /// /// # Examples /// /// ``` /// use ndarray_stats::histogram::{Edges, Bins, Grid}; /// /// let edges_x = Edges::from(vec![0, 1]); /// let edges_y = Edges::from(vec![-1, 0, 1]); /// let bins_x = Bins::new(edges_x); /// let bins_y = Bins::new(edges_y); /// let square_grid = Grid::from(vec![bins_x, bins_y]); /// /// assert_eq!(square_grid.shape(), vec![1usize, 2usize]); /// ``` #[must_use] pub fn shape(&self) -> Vec { self.projections.iter().map(Bins::len).collect() } /// Returns the grid projections on each coordinate axis as a slice of immutable references. #[must_use] pub fn projections(&self) -> &[Bins] { &self.projections } /// Returns an `n-dimensional` index, of bins along each axis that contains the point, if one /// exists. /// /// Returns `None` if the point is outside the grid. /// /// # Panics /// /// Panics if dimensionality of the point doesn't equal the grid's. /// /// # Examples /// /// Basic usage: /// /// ``` /// use ndarray::array; /// use ndarray_stats::histogram::{Edges, Bins, Grid}; /// use noisy_float::types::n64; /// /// let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]); /// let bins = Bins::new(edges); /// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]); /// /// // (0., -0.7) falls in 1st and 0th bin respectively /// assert_eq!( /// square_grid.index_of(&array![n64(0.), n64(-0.7)]), /// Some(vec![1, 0]), /// ); /// // Returns `None`, as `1.` is outside the grid since bins are right-open /// assert_eq!( /// square_grid.index_of(&array![n64(0.), n64(1.)]), /// None, /// ); /// ``` /// /// A panic upon dimensionality mismatch: /// /// ```should_panic /// # use ndarray::array; /// # use ndarray_stats::histogram::{Edges, Bins, Grid}; /// # use noisy_float::types::n64; /// # let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]); /// # let bins = Bins::new(edges); /// # let square_grid = Grid::from(vec![bins.clone(), bins.clone()]); /// // the point has 3 dimensions, the grid expected 2 dimensions /// assert_eq!( /// square_grid.index_of(&array![n64(0.), n64(-0.7), n64(0.5)]), /// Some(vec![1, 0, 1]), /// ); /// ``` pub fn index_of(&self, point: &ArrayRef) -> Option> { assert_eq!( point.len(), self.ndim(), "Dimension mismatch: the point has {:?} dimensions, the grid \ expected {:?} dimensions.", point.len(), self.ndim() ); point .iter() .zip(self.projections.iter()) .map(|(v, e)| e.index_of(v)) .collect() } } impl Grid { /// Given an `n`-dimensional index, `i = (i_0, ..., i_{n-1})`, returns an `n`-dimensional bin, /// `I_{i_0} x ... x I_{i_{n-1}}`, where `I_{i_j}` is the `i_j`-th interval on the `j`-th /// projection of the grid on the coordinate axes. /// /// # Panics /// /// Panics if at least one in the index, `(i_0, ..., i_{n-1})`, is out of bounds on the /// corresponding coordinate axis, i.e. if there exists `j` s.t. /// `i_j >= self.projections[j].len()`. /// /// # Examples /// /// Basic usage: /// /// ``` /// use ndarray::array; /// use ndarray_stats::histogram::{Edges, Bins, Grid}; /// /// let edges_x = Edges::from(vec![0, 1]); /// let edges_y = Edges::from(vec![2, 3, 4]); /// let bins_x = Bins::new(edges_x); /// let bins_y = Bins::new(edges_y); /// let square_grid = Grid::from(vec![bins_x, bins_y]); /// /// // Query the 0-th bin on x-axis, and 1-st bin on y-axis /// assert_eq!( /// square_grid.index(&[0, 1]), /// vec![0..1, 3..4], /// ); /// ``` /// /// A panic upon out-of-bounds: /// /// ```should_panic /// # use ndarray::array; /// # use ndarray_stats::histogram::{Edges, Bins, Grid}; /// # let edges_x = Edges::from(vec![0, 1]); /// # let edges_y = Edges::from(vec![2, 3, 4]); /// # let bins_x = Bins::new(edges_x); /// # let bins_y = Bins::new(edges_y); /// # let square_grid = Grid::from(vec![bins_x, bins_y]); /// // out-of-bound on y-axis /// assert_eq!( /// square_grid.index(&[0, 2]), /// vec![0..1, 3..4], /// ); /// ``` #[must_use] pub fn index(&self, index: &[usize]) -> Vec> { assert_eq!( index.len(), self.ndim(), "Dimension mismatch: the index has {0:?} dimensions, the grid \ expected {1:?} dimensions.", index.len(), self.ndim() ); izip!(&self.projections, index) .map(|(bins, &i)| bins.index(i)) .collect() } } /// A builder used to create [`Grid`] instances for [`histogram`] computations. /// /// # Examples /// /// Basic usage, creating a `Grid` with some observations and a given [`strategy`]: /// /// ``` /// use ndarray::Array; /// use ndarray_stats::histogram::{strategies::Auto, Bins, Edges, Grid, GridBuilder}; /// /// // 1-dimensional observations, as a (n_observations, n_dimension) 2-d matrix /// let observations = Array::from_shape_vec( /// (12, 1), /// vec![1, 4, 5, 2, 100, 20, 50, 65, 27, 40, 45, 23], /// ).unwrap(); /// /// // The optimal grid layout is inferred from the data, given a chosen strategy, Auto in this case /// let grid = GridBuilder::>::from_array(&observations).unwrap().build(); /// // Equivalently, build a Grid directly /// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![1, 20, 39, 58, 77, 96, 115]))]); /// /// assert_eq!(grid, expected_grid); /// ``` /// /// [`Grid`]: struct.Grid.html /// [`histogram`]: trait.HistogramExt.html /// [`strategy`]: strategies/index.html #[allow(clippy::module_name_repetitions)] pub struct GridBuilder { bin_builders: Vec, } impl GridBuilder where A: Ord, B: BinsBuildingStrategy, { /// Returns a `GridBuilder` for building a [`Grid`] with a given [`strategy`] and some /// observations in a 2-dimensionalarray with shape `(n_observations, n_dimension)`. /// /// # Errors /// /// It returns [`BinsBuildError`] if it is not possible to build a [`Grid`] given /// the observed data according to the chosen [`strategy`]. /// /// # Examples /// /// See [Trait-level examples] for basic usage. /// /// [`Grid`]: struct.Grid.html /// [`strategy`]: strategies/index.html /// [`BinsBuildError`]: errors/enum.BinsBuildError.html /// [Trait-level examples]: struct.GridBuilder.html#examples pub fn from_array(array: &ArrayRef) -> Result { let bin_builders = array .axis_iter(Axis(1)) .map(|data| B::from_array(&data)) .collect::, BinsBuildError>>()?; Ok(Self { bin_builders }) } /// Returns a [`Grid`] instance, with building parameters infered in [`from_array`], according /// to the specified [`strategy`] and observations provided. /// /// # Examples /// /// See [Trait-level examples] for basic usage. /// /// [`Grid`]: struct.Grid.html /// [`strategy`]: strategies/index.html /// [`from_array`]: #method.from_array.html #[must_use] pub fn build(&self) -> Grid { let projections: Vec<_> = self.bin_builders.iter().map(|b| b.build()).collect(); Grid::from(projections) } } ================================================ FILE: src/histogram/histograms.rs ================================================ use super::errors::BinNotFound; use super::grid::Grid; use ndarray::prelude::*; /// Histogram data structure. pub struct Histogram { counts: ArrayD, grid: Grid, } impl Histogram { /// Returns a new instance of Histogram given a [`Grid`]. /// /// [`Grid`]: struct.Grid.html pub fn new(grid: Grid) -> Self { let counts = ArrayD::zeros(grid.shape()); Histogram { counts, grid } } /// Adds a single observation to the histogram. /// /// **Panics** if dimensions do not match: `self.ndim() != observation.len()`. /// /// # Example: /// ``` /// use ndarray::array; /// use ndarray_stats::histogram::{Edges, Bins, Histogram, Grid}; /// use noisy_float::types::n64; /// /// let edges = Edges::from(vec![n64(-1.), n64(0.), n64(1.)]); /// let bins = Bins::new(edges); /// let square_grid = Grid::from(vec![bins.clone(), bins.clone()]); /// let mut histogram = Histogram::new(square_grid); /// /// let observation = array![n64(0.5), n64(0.6)]; /// /// histogram.add_observation(&observation)?; /// /// let histogram_matrix = histogram.counts(); /// let expected = array![ /// [0, 0], /// [0, 1], /// ]; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// # Ok::<(), Box>(()) /// ``` pub fn add_observation(&mut self, observation: &ArrayRef) -> Result<(), BinNotFound> { match self.grid.index_of(observation) { Some(bin_index) => { self.counts[&*bin_index] += 1; Ok(()) } None => Err(BinNotFound), } } /// Returns the number of dimensions of the space the histogram is covering. pub fn ndim(&self) -> usize { debug_assert_eq!(self.counts.ndim(), self.grid.ndim()); self.counts.ndim() } /// Borrows a view on the histogram counts matrix. pub fn counts(&self) -> ArrayViewD<'_, usize> { self.counts.view() } /// Borrows an immutable reference to the histogram grid. pub fn grid(&self) -> &Grid { &self.grid } } /// Extension trait for `ArrayRef` providing methods to compute histograms. pub trait HistogramExt { /// Returns the [histogram](https://en.wikipedia.org/wiki/Histogram) /// for a 2-dimensional array of points `M`. /// /// Let `(n, d)` be the shape of `M`: /// - `n` is the number of points; /// - `d` is the number of dimensions of the space those points belong to. /// It follows that every column in `M` is a `d`-dimensional point. /// /// For example: a (3, 4) matrix `M` is a collection of 3 points in a /// 4-dimensional space. /// /// Important: points outside the grid are ignored! /// /// **Panics** if `d` is different from `grid.ndim()`. /// /// # Example: /// /// ``` /// use ndarray::array; /// use ndarray_stats::{ /// HistogramExt, /// histogram::{ /// Histogram, Grid, GridBuilder, /// Edges, Bins, /// strategies::Sqrt}, /// }; /// use noisy_float::types::{N64, n64}; /// /// let observations = array![ /// [n64(1.), n64(0.5)], /// [n64(-0.5), n64(1.)], /// [n64(-1.), n64(-0.5)], /// [n64(0.5), n64(-1.)] /// ]; /// let grid = GridBuilder::>::from_array(&observations).unwrap().build(); /// let expected_grid = Grid::from( /// vec![ /// Bins::new(Edges::from(vec![n64(-1.), n64(0.), n64(1.), n64(2.)])), /// Bins::new(Edges::from(vec![n64(-1.), n64(0.), n64(1.), n64(2.)])), /// ] /// ); /// assert_eq!(grid, expected_grid); /// /// let histogram = observations.histogram(grid); /// /// let histogram_matrix = histogram.counts(); /// // Bins are left inclusive, right exclusive! /// let expected = array![ /// [1, 0, 1], /// [1, 0, 0], /// [0, 1, 0], /// ]; /// assert_eq!(histogram_matrix, expected.into_dyn()); /// ``` fn histogram(&self, grid: Grid) -> Histogram where A: Ord; private_decl! {} } impl HistogramExt for ArrayRef where A: Ord, { fn histogram(&self, grid: Grid) -> Histogram { let mut histogram = Histogram::new(grid); for point in self.axis_iter(Axis(0)) { let _ = histogram.add_observation(&point); } histogram } private_impl! {} } ================================================ FILE: src/histogram/mod.rs ================================================ //! Histogram functionalities. pub use self::bins::{Bins, Edges}; pub use self::grid::{Grid, GridBuilder}; pub use self::histograms::{Histogram, HistogramExt}; mod bins; pub mod errors; mod grid; mod histograms; pub mod strategies; ================================================ FILE: src/histogram/strategies.rs ================================================ //! Strategies used by [`GridBuilder`] to infer optimal parameters from data for building [`Bins`] //! and [`Grid`] instances. //! //! The docs for each strategy have been taken almost verbatim from [`NumPy`]. //! //! Each strategy specifies how to compute the optimal number of [`Bins`] or the optimal bin width. //! For those strategies that prescribe the optimal number of [`Bins`], the optimal bin width is //! computed by `bin_width = (max - min)/n`. //! //! Since all bins are left-closed and right-open, it is guaranteed to add an extra bin to include //! the maximum value from the given data when necessary, so that no data is discarded. //! //! # Strategies //! //! Currently, the following strategies are implemented: //! //! - [`Auto`]: Maximum of the [`Sturges`] and [`FreedmanDiaconis`] strategies. Provides good all //! around performance. //! - [`FreedmanDiaconis`]: Robust (resilient to outliers) strategy that takes into account data //! variability and data size. //! - [`Rice`]: A strategy that does not take variability into account, only data size. Commonly //! overestimates number of bins required. //! - [`Sqrt`]: Square root (of data size) strategy, used by Excel and other programs //! for its speed and simplicity. //! - [`Sturges`]: R’s default strategy, only accounts for data size. Only optimal for gaussian data //! and underestimates number of bins for large non-gaussian datasets. //! //! # Notes //! //! In general, successful infererence on optimal bin width and number of bins relies on //! **variability** of data. In other word, the provided ovservations should not be empty or //! constant. //! //! In addition, [`Auto`] and [`FreedmanDiaconis`] requires the [`interquartile range (IQR)`][iqr], //! i.e. the difference between upper and lower quartiles, to be positive. //! //! [`GridBuilder`]: ../struct.GridBuilder.html //! [`Bins`]: ../struct.Bins.html //! [`Grid`]: ../struct.Grid.html //! [`NumPy`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges //! [`Auto`]: struct.Auto.html //! [`Sturges`]: struct.Sturges.html //! [`FreedmanDiaconis`]: struct.FreedmanDiaconis.html //! [`Rice`]: struct.Rice.html //! [`Sqrt`]: struct.Sqrt.html //! [iqr]: https://www.wikiwand.com/en/Interquartile_range #![warn(missing_docs, clippy::all, clippy::pedantic)] use crate::{ histogram::{errors::BinsBuildError, Bins, Edges}, quantile::{interpolate::Nearest, Quantile1dExt, QuantileExt}, }; use ndarray::prelude::*; use noisy_float::types::n64; use num_traits::{FromPrimitive, NumOps, Zero}; /// A trait implemented by all strategies to build [`Bins`] with parameters inferred from /// observations. /// /// This is required by [`GridBuilder`] to know how to build a [`Grid`]'s projections on the /// coordinate axes. /// /// [`Bins`]: ../struct.Bins.html /// [`GridBuilder`]: ../struct.GridBuilder.html /// [`Grid`]: ../struct.Grid.html pub trait BinsBuildingStrategy { #[allow(missing_docs)] type Elem: Ord; /// Returns a strategy that has learnt the required parameter fo building [`Bins`] for given /// 1-dimensional array, or an `Err` if it is not possible to infer the required parameter /// with the given data and specified strategy. /// /// # Errors /// /// See each of the struct-level documentation for details on errors an implementor may return. /// /// [`Bins`]: ../struct.Bins.html fn from_array(array: &ArrayRef) -> Result where Self: std::marker::Sized; /// Returns a [`Bins`] instance, according to parameters inferred from observations. /// /// [`Bins`]: ../struct.Bins.html fn build(&self) -> Bins; /// Returns the optimal number of bins, according to parameters inferred from observations. fn n_bins(&self) -> usize; } #[derive(Debug)] struct EquiSpaced { bin_width: T, min: T, max: T, } /// Square root (of data size) strategy, used by Excel and other programs for its speed and /// simplicity. /// /// Let `n` be the number of observations. Then /// /// `n_bins` = `sqrt(n)` /// /// # Notes /// /// This strategy requires the data /// /// - not being empty /// - not being constant #[derive(Debug)] pub struct Sqrt { builder: EquiSpaced, } /// A strategy that does not take variability into account, only data size. Commonly /// overestimates number of bins required. /// /// Let `n` be the number of observations and `n_bins` be the number of bins. /// /// `n_bins` = 2`n`1/3 /// /// `n_bins` is only proportional to cube root of `n`. It tends to overestimate /// the `n_bins` and it does not take into account data variability. /// /// # Notes /// /// This strategy requires the data /// /// - not being empty /// - not being constant #[derive(Debug)] pub struct Rice { builder: EquiSpaced, } /// R’s default strategy, only accounts for data size. Only optimal for gaussian data and /// underestimates number of bins for large non-gaussian datasets. /// /// Let `n` be the number of observations. /// The number of bins is 1 plus the base 2 log of `n`. This estimator assumes normality of data and /// is too conservative for larger, non-normal datasets. /// /// This is the default method in R’s hist method. /// /// # Notes /// /// This strategy requires the data /// /// - not being empty /// - not being constant #[derive(Debug)] pub struct Sturges { builder: EquiSpaced, } /// Robust (resilient to outliers) strategy that takes into account data variability and data size. /// /// Let `n` be the number of observations. /// /// `bin_width` = 2 × `IQR` × `n`−1/3 /// /// The bin width is proportional to the interquartile range ([`IQR`]) and inversely proportional to /// cube root of `n`. It can be too conservative for small datasets, but it is quite good for large /// datasets. /// /// The [`IQR`] is very robust to outliers. /// /// # Notes /// /// This strategy requires the data /// /// - not being empty /// - not being constant /// - having positive [`IQR`] /// /// [`IQR`]: https://en.wikipedia.org/wiki/Interquartile_range #[derive(Debug)] pub struct FreedmanDiaconis { builder: EquiSpaced, } #[derive(Debug)] enum SturgesOrFD { Sturges(Sturges), FreedmanDiaconis(FreedmanDiaconis), } /// Maximum of the [`Sturges`] and [`FreedmanDiaconis`] strategies. Provides good all around /// performance. /// /// A compromise to get a good value. For small datasets the [`Sturges`] value will usually be /// chosen, while larger datasets will usually default to [`FreedmanDiaconis`]. Avoids the overly /// conservative behaviour of [`FreedmanDiaconis`] and [`Sturges`] for small and large datasets /// respectively. /// /// # Notes /// /// This strategy requires the data /// /// - not being empty /// - not being constant /// - having positive [`IQR`] /// /// [`Sturges`]: struct.Sturges.html /// [`FreedmanDiaconis`]: struct.FreedmanDiaconis.html /// [`IQR`]: https://en.wikipedia.org/wiki/Interquartile_range #[derive(Debug)] pub struct Auto { builder: SturgesOrFD, } impl EquiSpaced where T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// Returns `Err(BinsBuildError::Strategy)` if `bin_width<=0` or `min` >= `max`. /// Returns `Ok(Self)` otherwise. fn new(bin_width: T, min: T, max: T) -> Result { if (bin_width <= T::zero()) || (min >= max) { Err(BinsBuildError::Strategy) } else { Ok(Self { bin_width, min, max, }) } } fn build(&self) -> Bins { let n_bins = self.n_bins(); let mut edges: Vec = vec![]; for i in 0..=n_bins { let edge = self.min.clone() + T::from_usize(i).unwrap() * self.bin_width.clone(); edges.push(edge); } Bins::new(Edges::from(edges)) } fn n_bins(&self) -> usize { let mut max_edge = self.min.clone(); let mut n_bins = 0; while max_edge <= self.max { max_edge = max_edge + self.bin_width.clone(); n_bins += 1; } n_bins } fn bin_width(&self) -> T { self.bin_width.clone() } } impl BinsBuildingStrategy for Sqrt where T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// Returns `Err(BinsBuildError::Strategy)` if the array is constant. /// Returns `Err(BinsBuildError::EmptyInput)` if `a.len()==0`. /// Returns `Ok(Self)` otherwise. fn from_array(a: &ArrayRef) -> Result { let n_elems = a.len(); // casting `n_elems: usize` to `f64` may casus off-by-one error here if `n_elems` > 2 ^ 53, // but it's not relevant here #[allow(clippy::cast_precision_loss)] // casting the rounded square root from `f64` to `usize` is safe #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] let n_bins = (n_elems as f64).sqrt().round() as usize; let min = a.min()?; let max = a.max()?; let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins); let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?; Ok(Self { builder }) } fn build(&self) -> Bins { self.builder.build() } fn n_bins(&self) -> usize { self.builder.n_bins() } } impl Sqrt where T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() } } impl BinsBuildingStrategy for Rice where T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// Returns `Err(BinsBuildError::Strategy)` if the array is constant. /// Returns `Err(BinsBuildError::EmptyInput)` if `a.len()==0`. /// Returns `Ok(Self)` otherwise. fn from_array(a: &ArrayRef) -> Result { let n_elems = a.len(); // casting `n_elems: usize` to `f64` may casus off-by-one error here if `n_elems` > 2 ^ 53, // but it's not relevant here #[allow(clippy::cast_precision_loss)] // casting the rounded cube root from `f64` to `usize` is safe #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] let n_bins = (2. * (n_elems as f64).powf(1. / 3.)).round() as usize; let min = a.min()?; let max = a.max()?; let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins); let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?; Ok(Self { builder }) } fn build(&self) -> Bins { self.builder.build() } fn n_bins(&self) -> usize { self.builder.n_bins() } } impl Rice where T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() } } impl BinsBuildingStrategy for Sturges where T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// Returns `Err(BinsBuildError::Strategy)` if the array is constant. /// Returns `Err(BinsBuildError::EmptyInput)` if `a.len()==0`. /// Returns `Ok(Self)` otherwise. fn from_array(a: &ArrayRef) -> Result { let n_elems = a.len(); // casting `n_elems: usize` to `f64` may casus off-by-one error here if `n_elems` > 2 ^ 53, // but it's not relevant here #[allow(clippy::cast_precision_loss)] // casting the rounded base-2 log from `f64` to `usize` is safe #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] let n_bins = (n_elems as f64).log2().round() as usize + 1; let min = a.min()?; let max = a.max()?; let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins); let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?; Ok(Self { builder }) } fn build(&self) -> Bins { self.builder.build() } fn n_bins(&self) -> usize { self.builder.n_bins() } } impl Sturges where T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() } } impl BinsBuildingStrategy for FreedmanDiaconis where T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// Returns `Err(BinsBuildError::Strategy)` if `IQR==0`. /// Returns `Err(BinsBuildError::EmptyInput)` if `a.len()==0`. /// Returns `Ok(Self)` otherwise. fn from_array(a: &ArrayRef) -> Result { let n_points = a.len(); if n_points == 0 { return Err(BinsBuildError::EmptyInput); } let mut a_copy = a.to_owned(); let first_quartile = a_copy.quantile_mut(n64(0.25), &Nearest).unwrap(); let third_quartile = a_copy.quantile_mut(n64(0.75), &Nearest).unwrap(); let iqr = third_quartile - first_quartile; let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr); let min = a.min()?; let max = a.max()?; let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?; Ok(Self { builder }) } fn build(&self) -> Bins { self.builder.build() } fn n_bins(&self) -> usize { self.builder.n_bins() } } impl FreedmanDiaconis where T: Ord + Clone + FromPrimitive + NumOps + Zero, { fn compute_bin_width(n_bins: usize, iqr: T) -> T { // casting `n_bins: usize` to `f64` may casus off-by-one error here if `n_bins` > 2 ^ 53, // but it's not relevant here #[allow(clippy::cast_precision_loss)] let denominator = (n_bins as f64).powf(1. / 3.); T::from_usize(2).unwrap() * iqr / T::from_f64(denominator).unwrap() } /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { self.builder.bin_width() } } impl BinsBuildingStrategy for Auto where T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// Returns `Err(BinsBuildError::Strategy)` if `IQR==0`. /// Returns `Err(BinsBuildError::EmptyInput)` if `a.len()==0`. /// Returns `Ok(Self)` otherwise. fn from_array(a: &ArrayRef) -> Result { let fd_builder = FreedmanDiaconis::from_array(&a); let sturges_builder = Sturges::from_array(&a); match (fd_builder, sturges_builder) { (Err(_), Ok(sturges_builder)) => { let builder = SturgesOrFD::Sturges(sturges_builder); Ok(Self { builder }) } (Ok(fd_builder), Err(_)) => { let builder = SturgesOrFD::FreedmanDiaconis(fd_builder); Ok(Self { builder }) } (Ok(fd_builder), Ok(sturges_builder)) => { let builder = if fd_builder.bin_width() > sturges_builder.bin_width() { SturgesOrFD::Sturges(sturges_builder) } else { SturgesOrFD::FreedmanDiaconis(fd_builder) }; Ok(Self { builder }) } (Err(err), Err(_)) => Err(err), } } fn build(&self) -> Bins { // Ugly match &self.builder { SturgesOrFD::FreedmanDiaconis(b) => b.build(), SturgesOrFD::Sturges(b) => b.build(), } } fn n_bins(&self) -> usize { // Ugly match &self.builder { SturgesOrFD::FreedmanDiaconis(b) => b.n_bins(), SturgesOrFD::Sturges(b) => b.n_bins(), } } } impl Auto where T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { // Ugly match &self.builder { SturgesOrFD::FreedmanDiaconis(b) => b.bin_width(), SturgesOrFD::Sturges(b) => b.bin_width(), } } } /// Returns the `bin_width`, given the two end points of a range (`max`, `min`), and the number of /// bins, consuming endpoints /// /// `bin_width = (max - min)/n` /// /// **Panics** if `n_bins == 0` and division by 0 panics for `T`. fn compute_bin_width(min: T, max: T, n_bins: usize) -> T where T: Ord + Clone + FromPrimitive + NumOps + Zero, { let range = max - min; range / T::from_usize(n_bins).unwrap() } #[cfg(test)] mod equispaced_tests { use super::EquiSpaced; #[test] fn bin_width_has_to_be_positive() { assert!(EquiSpaced::new(0, 0, 200).is_err()); } #[test] fn min_has_to_be_strictly_smaller_than_max() { assert!(EquiSpaced::new(10, 0, 0).is_err()); } } #[cfg(test)] mod sqrt_tests { use super::{BinsBuildingStrategy, Sqrt}; use ndarray::array; #[test] fn constant_array_are_bad() { assert!(Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1]) .unwrap_err() .is_strategy()); } #[test] fn empty_arrays_are_bad() { assert!(Sqrt::::from_array(&array![]) .unwrap_err() .is_empty_input()); } } #[cfg(test)] mod rice_tests { use super::{BinsBuildingStrategy, Rice}; use ndarray::array; #[test] fn constant_array_are_bad() { assert!(Rice::from_array(&array![1, 1, 1, 1, 1, 1, 1]) .unwrap_err() .is_strategy()); } #[test] fn empty_arrays_are_bad() { assert!(Rice::::from_array(&array![]) .unwrap_err() .is_empty_input()); } } #[cfg(test)] mod sturges_tests { use super::{BinsBuildingStrategy, Sturges}; use ndarray::array; #[test] fn constant_array_are_bad() { assert!(Sturges::from_array(&array![1, 1, 1, 1, 1, 1, 1]) .unwrap_err() .is_strategy()); } #[test] fn empty_arrays_are_bad() { assert!(Sturges::::from_array(&array![]) .unwrap_err() .is_empty_input()); } } #[cfg(test)] mod fd_tests { use super::{BinsBuildingStrategy, FreedmanDiaconis}; use ndarray::array; #[test] fn constant_array_are_bad() { assert!(FreedmanDiaconis::from_array(&array![1, 1, 1, 1, 1, 1, 1]) .unwrap_err() .is_strategy()); } #[test] fn zero_iqr_is_bad() { assert!( FreedmanDiaconis::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]) .unwrap_err() .is_strategy() ); } #[test] fn empty_arrays_are_bad() { assert!(FreedmanDiaconis::::from_array(&array![]) .unwrap_err() .is_empty_input()); } } #[cfg(test)] mod auto_tests { use super::{Auto, BinsBuildingStrategy}; use ndarray::array; #[test] fn constant_array_are_bad() { assert!(Auto::from_array(&array![1, 1, 1, 1, 1, 1, 1]) .unwrap_err() .is_strategy()); } #[test] fn zero_iqr_is_handled_by_sturged() { assert!(Auto::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]).is_ok()); } #[test] fn empty_arrays_are_bad() { assert!(Auto::::from_array(&array![]) .unwrap_err() .is_empty_input()); } } ================================================ FILE: src/lib.rs ================================================ //! The [`ndarray-stats`] crate exposes statistical routines for `ArrayRef`, //! the *n*-dimensional array data structure provided by [`ndarray`]. //! //! Currently available routines include: //! - [order statistics] (minimum, maximum, median, quantiles, etc.); //! - [summary statistics] (mean, skewness, kurtosis, central moments, etc.) //! - [partitioning]; //! - [correlation analysis] (covariance, pearson correlation); //! - [measures from information theory] (entropy, KL divergence, etc.); //! - [measures of deviation] (count equal, L1, L2 distances, mean squared err etc.) //! - [histogram computation]. //! //! Please feel free to contribute new functionality! A roadmap can be found [here]. //! //! Our work is inspired by other existing statistical packages such as //! [`NumPy`] (Python) and [`StatsBase.jl`] (Julia) - any contribution bringing us closer to //! feature parity is more than welcome! //! //! [`ndarray-stats`]: https://github.com/rust-ndarray/ndarray-stats/ //! [`ndarray`]: https://github.com/rust-ndarray/ndarray //! [order statistics]: trait.QuantileExt.html //! [partitioning]: trait.Sort1dExt.html //! [summary statistics]: trait.SummaryStatisticsExt.html //! [correlation analysis]: trait.CorrelationExt.html //! [measures of deviation]: trait.DeviationExt.html //! [measures from information theory]: trait.EntropyExt.html //! [histogram computation]: histogram/index.html //! [here]: https://github.com/rust-ndarray/ndarray-stats/issues/1 //! [`NumPy`]: https://docs.scipy.org/doc/numpy-1.14.1/reference/routines.statistics.html //! [`StatsBase.jl`]: https://juliastats.github.io/StatsBase.jl/latest/ pub use crate::correlation::CorrelationExt; pub use crate::deviation::DeviationExt; pub use crate::entropy::EntropyExt; pub use crate::histogram::HistogramExt; pub use crate::maybe_nan::{MaybeNan, MaybeNanExt}; pub use crate::quantile::{interpolate, Quantile1dExt, QuantileExt}; pub use crate::sort::Sort1dExt; pub use crate::summary_statistics::SummaryStatisticsExt; #[cfg(test)] #[macro_use] extern crate approx; #[macro_use] mod multi_input_error_macros { macro_rules! return_err_if_empty { ($arr:expr) => { if $arr.len() == 0 { return Err(MultiInputError::EmptyInput); } }; } macro_rules! return_err_unless_same_shape { ($arr_a:expr, $arr_b:expr) => { use crate::errors::{MultiInputError, ShapeMismatch}; if $arr_a.shape() != $arr_b.shape() { return Err(MultiInputError::ShapeMismatch(ShapeMismatch { first_shape: $arr_a.shape().to_vec(), second_shape: $arr_b.shape().to_vec(), }) .into()); } }; } } #[macro_use] mod private { /// This is a public type in a private module, so it can be included in /// public APIs, but other crates can't access it. pub struct PrivateMarker; /// Defines an associated function for a trait that is impossible for other /// crates to implement. This makes it possible to add new associated /// types/functions/consts/etc. to the trait without breaking changes. macro_rules! private_decl { () => { /// This method makes this trait impossible to implement outside of /// `ndarray-stats` so that we can freely add new methods, etc., to /// this trait without breaking changes. /// /// We don't anticipate any other crates needing to implement this /// trait, but if you do have such a use-case, please let us know. /// /// **Warning** This method is not considered part of the public /// API, and client code should not rely on it being present. It /// may be removed in a non-breaking release. fn __private__(&self, _: crate::private::PrivateMarker); }; } /// Implements the associated function defined by `private_decl!`. macro_rules! private_impl { () => { fn __private__(&self, _: crate::private::PrivateMarker) {} }; } } mod correlation; mod deviation; mod entropy; pub mod errors; pub mod histogram; mod maybe_nan; mod quantile; mod sort; mod summary_statistics; ================================================ FILE: src/maybe_nan/impl_not_none.rs ================================================ use super::NotNone; use num_traits::{FromPrimitive, ToPrimitive}; use std::cmp; use std::fmt; use std::ops::{Add, Deref, DerefMut, Div, Mul, Rem, Sub}; impl Deref for NotNone { type Target = T; fn deref(&self) -> &T { match self.0 { Some(ref inner) => inner, None => unsafe { ::std::hint::unreachable_unchecked() }, } } } impl DerefMut for NotNone { fn deref_mut(&mut self) -> &mut T { match self.0 { Some(ref mut inner) => inner, None => unsafe { ::std::hint::unreachable_unchecked() }, } } } impl fmt::Display for NotNone { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { self.deref().fmt(f) } } impl Eq for NotNone {} impl PartialEq for NotNone { fn eq(&self, other: &Self) -> bool { self.deref().eq(other) } } impl Ord for NotNone { fn cmp(&self, other: &Self) -> cmp::Ordering { self.deref().cmp(other) } } impl PartialOrd for NotNone { fn partial_cmp(&self, other: &Self) -> Option { self.deref().partial_cmp(other) } fn lt(&self, other: &Self) -> bool { self.deref().lt(other) } fn le(&self, other: &Self) -> bool { self.deref().le(other) } fn gt(&self, other: &Self) -> bool { self.deref().gt(other) } fn ge(&self, other: &Self) -> bool { self.deref().ge(other) } } impl Add for NotNone { type Output = NotNone; #[inline] fn add(self, rhs: Self) -> Self::Output { self.map(|v| v.add(rhs.unwrap())) } } impl Sub for NotNone { type Output = NotNone; #[inline] fn sub(self, rhs: Self) -> Self::Output { self.map(|v| v.sub(rhs.unwrap())) } } impl Mul for NotNone { type Output = NotNone; #[inline] fn mul(self, rhs: Self) -> Self::Output { self.map(|v| v.mul(rhs.unwrap())) } } impl Div for NotNone { type Output = NotNone; #[inline] fn div(self, rhs: Self) -> Self::Output { self.map(|v| v.div(rhs.unwrap())) } } impl Rem for NotNone { type Output = NotNone; #[inline] fn rem(self, rhs: Self) -> Self::Output { self.map(|v| v.rem(rhs.unwrap())) } } impl ToPrimitive for NotNone { #[inline] fn to_isize(&self) -> Option { self.deref().to_isize() } #[inline] fn to_i8(&self) -> Option { self.deref().to_i8() } #[inline] fn to_i16(&self) -> Option { self.deref().to_i16() } #[inline] fn to_i32(&self) -> Option { self.deref().to_i32() } #[inline] fn to_i64(&self) -> Option { self.deref().to_i64() } #[inline] fn to_i128(&self) -> Option { self.deref().to_i128() } #[inline] fn to_usize(&self) -> Option { self.deref().to_usize() } #[inline] fn to_u8(&self) -> Option { self.deref().to_u8() } #[inline] fn to_u16(&self) -> Option { self.deref().to_u16() } #[inline] fn to_u32(&self) -> Option { self.deref().to_u32() } #[inline] fn to_u64(&self) -> Option { self.deref().to_u64() } #[inline] fn to_u128(&self) -> Option { self.deref().to_u128() } #[inline] fn to_f32(&self) -> Option { self.deref().to_f32() } #[inline] fn to_f64(&self) -> Option { self.deref().to_f64() } } impl FromPrimitive for NotNone { #[inline] fn from_isize(n: isize) -> Option { Self::try_new(T::from_isize(n)) } #[inline] fn from_i8(n: i8) -> Option { Self::try_new(T::from_i8(n)) } #[inline] fn from_i16(n: i16) -> Option { Self::try_new(T::from_i16(n)) } #[inline] fn from_i32(n: i32) -> Option { Self::try_new(T::from_i32(n)) } #[inline] fn from_i64(n: i64) -> Option { Self::try_new(T::from_i64(n)) } #[inline] fn from_i128(n: i128) -> Option { Self::try_new(T::from_i128(n)) } #[inline] fn from_usize(n: usize) -> Option { Self::try_new(T::from_usize(n)) } #[inline] fn from_u8(n: u8) -> Option { Self::try_new(T::from_u8(n)) } #[inline] fn from_u16(n: u16) -> Option { Self::try_new(T::from_u16(n)) } #[inline] fn from_u32(n: u32) -> Option { Self::try_new(T::from_u32(n)) } #[inline] fn from_u64(n: u64) -> Option { Self::try_new(T::from_u64(n)) } #[inline] fn from_u128(n: u128) -> Option { Self::try_new(T::from_u128(n)) } #[inline] fn from_f32(n: f32) -> Option { Self::try_new(T::from_f32(n)) } #[inline] fn from_f64(n: f64) -> Option { Self::try_new(T::from_f64(n)) } } ================================================ FILE: src/maybe_nan/mod.rs ================================================ use ndarray::prelude::*; use ndarray::{s, RemoveAxis}; use noisy_float::types::{N32, N64}; use std::mem; /// A number type that can have not-a-number values. pub trait MaybeNan: Sized { /// A type that is guaranteed not to be a NaN value. type NotNan; /// Returns `true` if the value is a NaN value. fn is_nan(&self) -> bool; /// Tries to convert the value to `NotNan`. /// /// Returns `None` if the value is a NaN value. fn try_as_not_nan(&self) -> Option<&Self::NotNan>; /// Converts the value. /// /// If the value is `None`, a NaN value is returned. fn from_not_nan(_: Self::NotNan) -> Self; /// Converts the value. /// /// If the value is `None`, a NaN value is returned. fn from_not_nan_opt(_: Option) -> Self; /// Converts the value. /// /// If the value is `None`, a NaN value is returned. fn from_not_nan_ref_opt(_: Option<&Self::NotNan>) -> &Self; /// Returns a view with the NaN values removed. /// /// This modifies the input view by moving elements as necessary. The final /// order of the elements is unspecified. However, this method is /// idempotent, and given the same input data, the result is always ordered /// the same way. fn remove_nan_mut(_: ArrayViewMut1<'_, Self>) -> ArrayViewMut1<'_, Self::NotNan>; } /// Returns a view with the NaN values removed. /// /// This modifies the input view by moving elements as necessary. fn remove_nan_mut(mut view: ArrayViewMut1<'_, A>) -> ArrayViewMut1<'_, A> { if view.is_empty() { return view.slice_move(s![..0]); } let mut i = 0; let mut j = view.len() - 1; loop { // At this point, `i == 0 || !view[i-1].is_nan()` // and `j == view.len() - 1 || view[j+1].is_nan()`. while i <= j && !view[i].is_nan() { i += 1; } // At this point, `view[i].is_nan() || i == j + 1`. while j > i && view[j].is_nan() { j -= 1; } // At this point, `!view[j].is_nan() || j == i`. if i >= j { return view.slice_move(s![..i]); } else { view.swap(i, j); i += 1; j -= 1; } } } /// Casts a view from one element type to another. /// /// # Panics /// /// Panics if `T` and `U` differ in size or alignment. /// /// # Safety /// /// The caller must ensure that qll elements in `view` are valid values for type `U`. unsafe fn cast_view_mut(mut view: ArrayViewMut1<'_, T>) -> ArrayViewMut1<'_, U> { assert_eq!(mem::size_of::(), mem::size_of::()); assert_eq!(mem::align_of::(), mem::align_of::()); let ptr: *mut U = view.as_mut_ptr().cast(); let len: usize = view.len_of(Axis(0)); let stride: isize = view.stride_of(Axis(0)); if len <= 1 { // We can use a stride of `0` because the stride is irrelevant for the `len == 1` case. let stride = 0; ArrayViewMut1::from_shape_ptr([len].strides([stride]), ptr) } else if stride >= 0 { let stride = stride as usize; ArrayViewMut1::from_shape_ptr([len].strides([stride]), ptr) } else { // At this point, stride < 0. We have to construct the view by using the inverse of the // stride and then inverting the axis, since `ArrayViewMut::from_shape_ptr` requires the // stride to be nonnegative. let neg_stride = stride.checked_neg().unwrap() as usize; // This is safe because `ndarray` guarantees that it's safe to offset the // pointer anywhere in the array. let neg_ptr = ptr.offset((len - 1) as isize * stride); let mut v = ArrayViewMut1::from_shape_ptr([len].strides([neg_stride]), neg_ptr); v.invert_axis(Axis(0)); v } } macro_rules! impl_maybenan_for_fxx { ($fxx:ident, $Nxx:ident) => { impl MaybeNan for $fxx { type NotNan = $Nxx; fn is_nan(&self) -> bool { $fxx::is_nan(*self) } fn try_as_not_nan(&self) -> Option<&$Nxx> { $Nxx::try_borrowed(self) } fn from_not_nan(value: $Nxx) -> $fxx { value.raw() } fn from_not_nan_opt(value: Option<$Nxx>) -> $fxx { match value { None => ::std::$fxx::NAN, Some(num) => num.raw(), } } fn from_not_nan_ref_opt(value: Option<&$Nxx>) -> &$fxx { match value { None => &::std::$fxx::NAN, Some(num) => num.as_ref(), } } fn remove_nan_mut(view: ArrayViewMut1<'_, $fxx>) -> ArrayViewMut1<'_, $Nxx> { let not_nan = remove_nan_mut(view); // This is safe because `remove_nan_mut` has removed the NaN values, and `$Nxx` is // a thin wrapper around `$fxx`. unsafe { cast_view_mut(not_nan) } } } }; } impl_maybenan_for_fxx!(f32, N32); impl_maybenan_for_fxx!(f64, N64); macro_rules! impl_maybenan_for_opt_never_nan { ($ty:ty) => { impl MaybeNan for Option<$ty> { type NotNan = NotNone<$ty>; fn is_nan(&self) -> bool { self.is_none() } fn try_as_not_nan(&self) -> Option<&NotNone<$ty>> { if self.is_none() { None } else { // This is safe because we have checked for the `None` // case, and `NotNone<$ty>` is a thin wrapper around `Option<$ty>`. Some(unsafe { &*(self as *const Option<$ty> as *const NotNone<$ty>) }) } } fn from_not_nan(value: NotNone<$ty>) -> Option<$ty> { value.into_inner() } fn from_not_nan_opt(value: Option>) -> Option<$ty> { value.and_then(|v| v.into_inner()) } fn from_not_nan_ref_opt(value: Option<&NotNone<$ty>>) -> &Option<$ty> { match value { None => &None, // This is safe because `NotNone<$ty>` is a thin wrapper around // `Option<$ty>`. Some(num) => unsafe { &*(num as *const NotNone<$ty> as *const Option<$ty>) }, } } fn remove_nan_mut(view: ArrayViewMut1<'_, Self>) -> ArrayViewMut1<'_, Self::NotNan> { let not_nan = remove_nan_mut(view); // This is safe because `remove_nan_mut` has removed the `None` // values, and `NotNone<$ty>` is a thin wrapper around `Option<$ty>`. unsafe { ArrayViewMut1::from_shape_ptr( not_nan.dim(), not_nan.as_ptr() as *mut NotNone<$ty>, ) } } } }; } impl_maybenan_for_opt_never_nan!(u8); impl_maybenan_for_opt_never_nan!(u16); impl_maybenan_for_opt_never_nan!(u32); impl_maybenan_for_opt_never_nan!(u64); impl_maybenan_for_opt_never_nan!(u128); impl_maybenan_for_opt_never_nan!(i8); impl_maybenan_for_opt_never_nan!(i16); impl_maybenan_for_opt_never_nan!(i32); impl_maybenan_for_opt_never_nan!(i64); impl_maybenan_for_opt_never_nan!(i128); impl_maybenan_for_opt_never_nan!(N32); impl_maybenan_for_opt_never_nan!(N64); /// A thin wrapper around `Option` that guarantees that the value is not /// `None`. #[derive(Clone, Copy, Debug)] #[repr(transparent)] pub struct NotNone(Option); impl NotNone { /// Creates a new `NotNone` containing the given value. pub fn new(value: T) -> NotNone { NotNone(Some(value)) } /// Creates a new `NotNone` containing the given value. /// /// Returns `None` if `value` is `None`. pub fn try_new(value: Option) -> Option> { if value.is_some() { Some(NotNone(value)) } else { None } } /// Returns the underling option. pub fn into_inner(self) -> Option { self.0 } /// Moves the value out of the inner option. /// /// This method is guaranteed not to panic. pub fn unwrap(self) -> T { match self.0 { Some(inner) => inner, None => unsafe { ::std::hint::unreachable_unchecked() }, } } /// Maps an `NotNone` to `NotNone` by applying a function to the /// contained value. pub fn map(self, f: F) -> NotNone where F: FnOnce(T) -> U, { NotNone::new(f(self.unwrap())) } } /// Extension trait for `ArrayRef` providing NaN-related functionality. pub trait MaybeNanExt where A: MaybeNan, D: Dimension, { /// Traverse the non-NaN array elements and apply a fold, returning the /// resulting value. /// /// Elements are visited in arbitrary order. fn fold_skipnan<'a, F, B>(&'a self, init: B, f: F) -> B where A: 'a, F: FnMut(B, &'a A::NotNan) -> B; /// Traverse the non-NaN elements and their indices and apply a fold, /// returning the resulting value. /// /// Elements are visited in arbitrary order. fn indexed_fold_skipnan<'a, F, B>(&'a self, init: B, f: F) -> B where A: 'a, F: FnMut(B, (D::Pattern, &'a A::NotNan)) -> B; /// Visit each non-NaN element in the array by calling `f` on each element. /// /// Elements are visited in arbitrary order. fn visit_skipnan<'a, F>(&'a self, f: F) where A: 'a, F: FnMut(&'a A::NotNan); /// Fold non-NaN values along an axis. /// /// Combine the non-NaN elements of each subview with the previous using /// the fold function and initial value init. fn fold_axis_skipnan(&self, axis: Axis, init: B, fold: F) -> Array where D: RemoveAxis, F: FnMut(&B, &A::NotNan) -> B, B: Clone; /// Reduce the values along an axis into just one value, producing a new /// array with one less dimension. /// /// The NaN values are removed from the 1-dimensional lanes, then they are /// passed as mutable views to the reducer, allowing for side-effects. /// /// **Warnings**: /// /// * The lanes are visited in arbitrary order. /// /// * The order of the elements within the lanes is unspecified. However, /// if `mapping` is idempotent, this method is idempotent. Additionally, /// given the same input data, the lane is always ordered the same way. /// /// **Panics** if `axis` is out of bounds. fn map_axis_skipnan_mut<'a, B, F>(&'a mut self, axis: Axis, mapping: F) -> Array where A: 'a, D: RemoveAxis, F: FnMut(ArrayViewMut1<'a, A::NotNan>) -> B; private_decl! {} } impl MaybeNanExt for ArrayRef where A: MaybeNan, D: Dimension, { fn fold_skipnan<'a, F, B>(&'a self, init: B, mut f: F) -> B where A: 'a, F: FnMut(B, &'a A::NotNan) -> B, { self.fold(init, |acc, elem| { if let Some(not_nan) = elem.try_as_not_nan() { f(acc, not_nan) } else { acc } }) } fn indexed_fold_skipnan<'a, F, B>(&'a self, init: B, mut f: F) -> B where A: 'a, F: FnMut(B, (D::Pattern, &'a A::NotNan)) -> B, { self.indexed_iter().fold(init, |acc, (idx, elem)| { if let Some(not_nan) = elem.try_as_not_nan() { f(acc, (idx, not_nan)) } else { acc } }) } fn visit_skipnan<'a, F>(&'a self, mut f: F) where A: 'a, F: FnMut(&'a A::NotNan), { self.for_each(|elem| { if let Some(not_nan) = elem.try_as_not_nan() { f(not_nan) } }) } fn fold_axis_skipnan(&self, axis: Axis, init: B, mut fold: F) -> Array where D: RemoveAxis, F: FnMut(&B, &A::NotNan) -> B, B: Clone, { self.fold_axis(axis, init, |acc, elem| { if let Some(not_nan) = elem.try_as_not_nan() { fold(acc, not_nan) } else { acc.clone() } }) } fn map_axis_skipnan_mut<'a, B, F>( &'a mut self, axis: Axis, mut mapping: F, ) -> Array where A: 'a, D: RemoveAxis, F: FnMut(ArrayViewMut1<'a, A::NotNan>) -> B, { self.map_axis_mut(axis, |lane| mapping(A::remove_nan_mut(lane))) } private_impl! {} } #[cfg(test)] mod tests { use super::*; use quickcheck_macros::quickcheck; #[quickcheck] fn remove_nan_mut_idempotent(is_nan: Vec) -> bool { let mut values: Vec<_> = is_nan .into_iter() .map(|is_nan| if is_nan { None } else { Some(1) }) .collect(); let view = ArrayViewMut1::from_shape(values.len(), &mut values).unwrap(); let removed = remove_nan_mut(view); removed == remove_nan_mut(removed.to_owned().view_mut()) } #[quickcheck] fn remove_nan_mut_only_nan_remaining(is_nan: Vec) -> bool { let mut values: Vec<_> = is_nan .into_iter() .map(|is_nan| if is_nan { None } else { Some(1) }) .collect(); let view = ArrayViewMut1::from_shape(values.len(), &mut values).unwrap(); remove_nan_mut(view).iter().all(|elem| !elem.is_nan()) } #[quickcheck] fn remove_nan_mut_keep_all_non_nan(is_nan: Vec) -> bool { let non_nan_count = is_nan.iter().filter(|&&is_nan| !is_nan).count(); let mut values: Vec<_> = is_nan .into_iter() .map(|is_nan| if is_nan { None } else { Some(1) }) .collect(); let view = ArrayViewMut1::from_shape(values.len(), &mut values).unwrap(); remove_nan_mut(view).len() == non_nan_count } } mod impl_not_none; ================================================ FILE: src/quantile/interpolate.rs ================================================ //! Interpolation strategies. use noisy_float::types::N64; use num_traits::{Float, FromPrimitive, NumOps, ToPrimitive}; fn float_quantile_index(q: N64, len: usize) -> N64 { q * ((len - 1) as f64) } /// Returns the fraction that the quantile is between the lower and higher indices. /// /// This ranges from 0, where the quantile exactly corresponds the lower index, /// to 1, where the quantile exactly corresponds to the higher index. fn float_quantile_index_fraction(q: N64, len: usize) -> N64 { float_quantile_index(q, len).fract() } /// Returns the index of the value on the lower side of the quantile. pub(crate) fn lower_index(q: N64, len: usize) -> usize { float_quantile_index(q, len).floor().to_usize().unwrap() } /// Returns the index of the value on the higher side of the quantile. pub(crate) fn higher_index(q: N64, len: usize) -> usize { float_quantile_index(q, len).ceil().to_usize().unwrap() } /// Used to provide an interpolation strategy to [`quantile_axis_mut`]. /// /// [`quantile_axis_mut`]: ../trait.QuantileExt.html#tymethod.quantile_axis_mut pub trait Interpolate { /// Returns `true` iff the lower value is needed to compute the /// interpolated value. #[doc(hidden)] fn needs_lower(q: N64, len: usize) -> bool; /// Returns `true` iff the higher value is needed to compute the /// interpolated value. #[doc(hidden)] fn needs_higher(q: N64, len: usize) -> bool; /// Computes the interpolated value. /// /// **Panics** if `None` is provided for the lower value when it's needed /// or if `None` is provided for the higher value when it's needed. #[doc(hidden)] fn interpolate(lower: Option, higher: Option, q: N64, len: usize) -> T; private_decl! {} } /// Select the higher value. pub struct Higher; /// Select the lower value. pub struct Lower; /// Select the nearest value. pub struct Nearest; /// Select the midpoint of the two values (`(lower + higher) / 2`). pub struct Midpoint; /// Linearly interpolate between the two values /// (`lower + (higher - lower) * fraction`, where `fraction` is the /// fractional part of the index surrounded by `lower` and `higher`). pub struct Linear; impl Interpolate for Higher { fn needs_lower(_q: N64, _len: usize) -> bool { false } fn needs_higher(_q: N64, _len: usize) -> bool { true } fn interpolate(_lower: Option, higher: Option, _q: N64, _len: usize) -> T { higher.unwrap() } private_impl! {} } impl Interpolate for Lower { fn needs_lower(_q: N64, _len: usize) -> bool { true } fn needs_higher(_q: N64, _len: usize) -> bool { false } fn interpolate(lower: Option, _higher: Option, _q: N64, _len: usize) -> T { lower.unwrap() } private_impl! {} } impl Interpolate for Nearest { fn needs_lower(q: N64, len: usize) -> bool { float_quantile_index_fraction(q, len) < 0.5 } fn needs_higher(q: N64, len: usize) -> bool { !>::needs_lower(q, len) } fn interpolate(lower: Option, higher: Option, q: N64, len: usize) -> T { if >::needs_lower(q, len) { lower.unwrap() } else { higher.unwrap() } } private_impl! {} } impl Interpolate for Midpoint where T: NumOps + Clone + FromPrimitive, { fn needs_lower(_q: N64, _len: usize) -> bool { true } fn needs_higher(_q: N64, _len: usize) -> bool { true } fn interpolate(lower: Option, higher: Option, _q: N64, _len: usize) -> T { let denom = T::from_u8(2).unwrap(); let lower = lower.unwrap(); let higher = higher.unwrap(); lower.clone() + (higher.clone() - lower.clone()) / denom.clone() } private_impl! {} } impl Interpolate for Linear where T: NumOps + Clone + FromPrimitive + ToPrimitive, { fn needs_lower(_q: N64, _len: usize) -> bool { true } fn needs_higher(_q: N64, _len: usize) -> bool { true } fn interpolate(lower: Option, higher: Option, q: N64, len: usize) -> T { let fraction = float_quantile_index_fraction(q, len).to_f64().unwrap(); let lower = lower.unwrap(); let higher = higher.unwrap(); let lower_f64 = lower.to_f64().unwrap(); let higher_f64 = higher.to_f64().unwrap(); lower.clone() + T::from_f64(fraction * (higher_f64 - lower_f64)).unwrap() } private_impl! {} } ================================================ FILE: src/quantile/mod.rs ================================================ use self::interpolate::{higher_index, lower_index, Interpolate}; use super::sort::get_many_from_sorted_mut_unchecked; use crate::errors::QuantileError; use crate::errors::{EmptyInput, MinMaxError, MinMaxError::UndefinedOrder}; use crate::{MaybeNan, MaybeNanExt}; use ndarray::prelude::*; use ndarray::{RemoveAxis, Zip}; use noisy_float::types::N64; use std::cmp; /// Quantile methods for `ArrayRef`. pub trait QuantileExt where D: Dimension, { /// Finds the index of the minimum value of the array. /// /// Returns `Err(MinMaxError::UndefinedOrder)` if any of the pairwise /// orderings tested by the function are undefined. (For example, this /// occurs if there are any floating-point NaN values in the array.) /// /// Returns `Err(MinMaxError::EmptyInput)` if the array is empty. /// /// Even if there are multiple (equal) elements that are minima, only one /// index is returned. (Which one is returned is unspecified and may depend /// on the memory layout of the array.) /// /// # Example /// /// ``` /// use ndarray::array; /// use ndarray_stats::QuantileExt; /// /// let a = array![[1., 3., 5.], /// [2., 0., 6.]]; /// assert_eq!(a.argmin(), Ok((1, 1))); /// ``` fn argmin(&self) -> Result where A: PartialOrd; /// Finds the index of the minimum value of the array skipping NaN values. /// /// Returns `Err(EmptyInput)` if the array is empty or none of the values in the array /// are non-NaN values. /// /// Even if there are multiple (equal) elements that are minima, only one /// index is returned. (Which one is returned is unspecified and may depend /// on the memory layout of the array.) /// /// # Example /// /// ``` /// use ndarray::array; /// use ndarray_stats::QuantileExt; /// /// let a = array![[::std::f64::NAN, 3., 5.], /// [2., 0., 6.]]; /// assert_eq!(a.argmin_skipnan(), Ok((1, 1))); /// ``` fn argmin_skipnan(&self) -> Result where A: MaybeNan, A::NotNan: Ord; /// Finds the elementwise minimum of the array. /// /// Returns `Err(MinMaxError::UndefinedOrder)` if any of the pairwise /// orderings tested by the function are undefined. (For example, this /// occurs if there are any floating-point NaN values in the array.) /// /// Returns `Err(MinMaxError::EmptyInput)` if the array is empty. /// /// Even if there are multiple (equal) elements that are minima, only one /// is returned. (Which one is returned is unspecified and may depend on /// the memory layout of the array.) fn min(&self) -> Result<&A, MinMaxError> where A: PartialOrd; /// Finds the elementwise minimum of the array, skipping NaN values. /// /// Even if there are multiple (equal) elements that are minima, only one /// is returned. (Which one is returned is unspecified and may depend on /// the memory layout of the array.) /// /// **Warning** This method will return a NaN value if none of the values /// in the array are non-NaN values. Note that the NaN value might not be /// in the array. fn min_skipnan(&self) -> &A where A: MaybeNan, A::NotNan: Ord; /// Finds the index of the maximum value of the array. /// /// Returns `Err(MinMaxError::UndefinedOrder)` if any of the pairwise /// orderings tested by the function are undefined. (For example, this /// occurs if there are any floating-point NaN values in the array.) /// /// Returns `Err(MinMaxError::EmptyInput)` if the array is empty. /// /// Even if there are multiple (equal) elements that are maxima, only one /// index is returned. (Which one is returned is unspecified and may depend /// on the memory layout of the array.) /// /// # Example /// /// ``` /// use ndarray::array; /// use ndarray_stats::QuantileExt; /// /// let a = array![[1., 3., 7.], /// [2., 5., 6.]]; /// assert_eq!(a.argmax(), Ok((0, 2))); /// ``` fn argmax(&self) -> Result where A: PartialOrd; /// Finds the index of the maximum value of the array skipping NaN values. /// /// Returns `Err(EmptyInput)` if the array is empty or none of the values in the array /// are non-NaN values. /// /// Even if there are multiple (equal) elements that are maxima, only one /// index is returned. (Which one is returned is unspecified and may depend /// on the memory layout of the array.) /// /// # Example /// /// ``` /// use ndarray::array; /// use ndarray_stats::QuantileExt; /// /// let a = array![[::std::f64::NAN, 3., 5.], /// [2., 0., 6.]]; /// assert_eq!(a.argmax_skipnan(), Ok((1, 2))); /// ``` fn argmax_skipnan(&self) -> Result where A: MaybeNan, A::NotNan: Ord; /// Finds the elementwise maximum of the array. /// /// Returns `Err(MinMaxError::UndefinedOrder)` if any of the pairwise /// orderings tested by the function are undefined. (For example, this /// occurs if there are any floating-point NaN values in the array.) /// /// Returns `Err(EmptyInput)` if the array is empty. /// /// Even if there are multiple (equal) elements that are maxima, only one /// is returned. (Which one is returned is unspecified and may depend on /// the memory layout of the array.) fn max(&self) -> Result<&A, MinMaxError> where A: PartialOrd; /// Finds the elementwise maximum of the array, skipping NaN values. /// /// Even if there are multiple (equal) elements that are maxima, only one /// is returned. (Which one is returned is unspecified and may depend on /// the memory layout of the array.) /// /// **Warning** This method will return a NaN value if none of the values /// in the array are non-NaN values. Note that the NaN value might not be /// in the array. fn max_skipnan(&self) -> &A where A: MaybeNan, A::NotNan: Ord; /// Return the qth quantile of the data along the specified axis. /// /// `q` needs to be a float between 0 and 1, bounds included. /// The qth quantile for a 1-dimensional lane of length `N` is defined /// as the element that would be indexed as `(N-1)q` if the lane were to be sorted /// in increasing order. /// If `(N-1)q` is not an integer the desired quantile lies between /// two data points: we return the lower, nearest, higher or interpolated /// value depending on the `interpolate` strategy. /// /// Some examples: /// - `q=0.` returns the minimum along each 1-dimensional lane; /// - `q=0.5` returns the median along each 1-dimensional lane; /// - `q=1.` returns the maximum along each 1-dimensional lane. /// (`q=0` and `q=1` are considered improper quantiles) /// /// The array is shuffled **in place** along each 1-dimensional lane in /// order to produce the required quantile without allocating a copy /// of the original array. Each 1-dimensional lane is shuffled independently /// from the others. /// No assumptions should be made on the ordering of the array elements /// after this computation. /// /// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)): /// - average case: O(`m`); /// - worst case: O(`m`^2); /// where `m` is the number of elements in the array. /// /// Returns `Err(EmptyInput)` when the specified axis has length 0. /// /// Returns `Err(InvalidQuantile(q))` if `q` is not between `0.` and `1.` (inclusive). /// /// **Panics** if `axis` is out of bounds. fn quantile_axis_mut( &mut self, axis: Axis, q: N64, interpolate: &I, ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, I: Interpolate; /// A bulk version of [`quantile_axis_mut`], optimized to retrieve multiple /// quantiles at once. /// /// Returns an `Array`, where subviews along `axis` of the array correspond /// to the elements of `qs`. /// /// See [`quantile_axis_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. /// /// Returns `Err(EmptyInput)` when the specified axis has length 0. /// /// Returns `Err(InvalidQuantile(q))` if any `q` in `qs` is not between `0.` and `1.` (inclusive). /// /// **Panics** if `axis` is out of bounds. /// /// [`quantile_axis_mut`]: #tymethod.quantile_axis_mut /// /// # Example /// /// ```rust /// use ndarray::{array, aview1, Axis}; /// use ndarray_stats::{QuantileExt, interpolate::Nearest}; /// use noisy_float::types::n64; /// /// let mut data = array![[3, 4, 5], [6, 7, 8]]; /// let axis = Axis(1); /// let qs = &[n64(0.3), n64(0.7)]; /// let quantiles = data.quantiles_axis_mut(axis, &aview1(qs), &Nearest).unwrap(); /// for (&q, quantile) in qs.iter().zip(quantiles.axis_iter(axis)) { /// assert_eq!(quantile, data.quantile_axis_mut(axis, q, &Nearest).unwrap()); /// } /// ``` fn quantiles_axis_mut( &mut self, axis: Axis, qs: &ArrayRef, interpolate: &I, ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, I: Interpolate; /// Return the `q`th quantile of the data along the specified axis, skipping NaN values. /// /// See [`quantile_axis_mut`](#tymethod.quantile_axis_mut) for details. fn quantile_axis_skipnan_mut( &mut self, axis: Axis, q: N64, interpolate: &I, ) -> Result, QuantileError> where D: RemoveAxis, A: MaybeNan, A::NotNan: Clone + Ord, I: Interpolate; private_decl! {} } impl QuantileExt for ArrayRef where D: Dimension, { fn argmin(&self) -> Result where A: PartialOrd, { let mut current_min = self.first().ok_or(EmptyInput)?; let mut current_pattern_min = D::zeros(self.ndim()).into_pattern(); for (pattern, elem) in self.indexed_iter() { if elem.partial_cmp(current_min).ok_or(UndefinedOrder)? == cmp::Ordering::Less { current_pattern_min = pattern; current_min = elem } } Ok(current_pattern_min) } fn argmin_skipnan(&self) -> Result where A: MaybeNan, A::NotNan: Ord, { let mut pattern_min = D::zeros(self.ndim()).into_pattern(); let min = self.indexed_fold_skipnan(None, |current_min, (pattern, elem)| { Some(match current_min { Some(m) if (m <= elem) => m, _ => { pattern_min = pattern; elem } }) }); if min.is_some() { Ok(pattern_min) } else { Err(EmptyInput) } } fn min(&self) -> Result<&A, MinMaxError> where A: PartialOrd, { let first = self.first().ok_or(EmptyInput)?; self.fold(Ok(first), |acc, elem| { let acc = acc?; match elem.partial_cmp(acc).ok_or(UndefinedOrder)? { cmp::Ordering::Less => Ok(elem), _ => Ok(acc), } }) } fn min_skipnan(&self) -> &A where A: MaybeNan, A::NotNan: Ord, { let first = self.first().and_then(|v| v.try_as_not_nan()); A::from_not_nan_ref_opt(self.fold_skipnan(first, |acc, elem| { Some(match acc { Some(acc) => acc.min(elem), None => elem, }) })) } fn argmax(&self) -> Result where A: PartialOrd, { let mut current_max = self.first().ok_or(EmptyInput)?; let mut current_pattern_max = D::zeros(self.ndim()).into_pattern(); for (pattern, elem) in self.indexed_iter() { if elem.partial_cmp(current_max).ok_or(UndefinedOrder)? == cmp::Ordering::Greater { current_pattern_max = pattern; current_max = elem } } Ok(current_pattern_max) } fn argmax_skipnan(&self) -> Result where A: MaybeNan, A::NotNan: Ord, { let mut pattern_max = D::zeros(self.ndim()).into_pattern(); let max = self.indexed_fold_skipnan(None, |current_max, (pattern, elem)| { Some(match current_max { Some(m) if m >= elem => m, _ => { pattern_max = pattern; elem } }) }); if max.is_some() { Ok(pattern_max) } else { Err(EmptyInput) } } fn max(&self) -> Result<&A, MinMaxError> where A: PartialOrd, { let first = self.first().ok_or(EmptyInput)?; self.fold(Ok(first), |acc, elem| { let acc = acc?; match elem.partial_cmp(acc).ok_or(UndefinedOrder)? { cmp::Ordering::Greater => Ok(elem), _ => Ok(acc), } }) } fn max_skipnan(&self) -> &A where A: MaybeNan, A::NotNan: Ord, { let first = self.first().and_then(|v| v.try_as_not_nan()); A::from_not_nan_ref_opt(self.fold_skipnan(first, |acc, elem| { Some(match acc { Some(acc) => acc.max(elem), None => elem, }) })) } fn quantiles_axis_mut( &mut self, axis: Axis, qs: &ArrayRef, interpolate: &I, ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, I: Interpolate, { // Minimize number of type parameters to avoid monomorphization bloat. fn quantiles_axis_mut( mut data: ArrayViewMut<'_, A, D>, axis: Axis, qs: ArrayView1<'_, N64>, _interpolate: &I, ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, I: Interpolate, { for &q in qs { if !((q >= 0.) && (q <= 1.)) { return Err(QuantileError::InvalidQuantile(q)); } } let axis_len = data.len_of(axis); if axis_len == 0 { return Err(QuantileError::EmptyInput); } let mut results_shape = data.raw_dim(); results_shape[axis.index()] = qs.len(); if results_shape.size() == 0 { return Ok(Array::from_shape_vec(results_shape, Vec::new()).unwrap()); } let mut searched_indexes = Vec::with_capacity(2 * qs.len()); for &q in &qs { if I::needs_lower(q, axis_len) { searched_indexes.push(lower_index(q, axis_len)); } if I::needs_higher(q, axis_len) { searched_indexes.push(higher_index(q, axis_len)); } } searched_indexes.sort(); searched_indexes.dedup(); let mut results = Array::from_elem(results_shape, data.first().unwrap().clone()); Zip::from(results.lanes_mut(axis)) .and(data.lanes_mut(axis)) .for_each(|mut results, mut data| { let index_map = get_many_from_sorted_mut_unchecked(&mut data, &searched_indexes); for (result, &q) in results.iter_mut().zip(qs) { let lower = if I::needs_lower(q, axis_len) { Some(index_map[&lower_index(q, axis_len)].clone()) } else { None }; let higher = if I::needs_higher(q, axis_len) { Some(index_map[&higher_index(q, axis_len)].clone()) } else { None }; *result = I::interpolate(lower, higher, q, axis_len); } }); Ok(results) } quantiles_axis_mut(self.view_mut(), axis, qs.view(), interpolate) } fn quantile_axis_mut( &mut self, axis: Axis, q: N64, interpolate: &I, ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, I: Interpolate, { self.quantiles_axis_mut(axis, &aview1(&[q]), interpolate) .map(|a| a.index_axis_move(axis, 0)) } fn quantile_axis_skipnan_mut( &mut self, axis: Axis, q: N64, interpolate: &I, ) -> Result, QuantileError> where D: RemoveAxis, A: MaybeNan, A::NotNan: Clone + Ord, I: Interpolate, { if !((q >= 0.) && (q <= 1.)) { return Err(QuantileError::InvalidQuantile(q)); } if self.len_of(axis) == 0 { return Err(QuantileError::EmptyInput); } let quantile = self.map_axis_mut(axis, |lane| { let mut not_nan = A::remove_nan_mut(lane); A::from_not_nan_opt(if not_nan.is_empty() { None } else { Some( not_nan .quantile_axis_mut::(Axis(0), q, interpolate) .unwrap() .into_scalar(), ) }) }); Ok(quantile) } private_impl! {} } /// Quantile methods for 1-D arrays. pub trait Quantile1dExt { /// Return the qth quantile of the data. /// /// `q` needs to be a float between 0 and 1, bounds included. /// The qth quantile for a 1-dimensional array of length `N` is defined /// as the element that would be indexed as `(N-1)q` if the array were to be sorted /// in increasing order. /// If `(N-1)q` is not an integer the desired quantile lies between /// two data points: we return the lower, nearest, higher or interpolated /// value depending on the `interpolate` strategy. /// /// Some examples: /// - `q=0.` returns the minimum; /// - `q=0.5` returns the median; /// - `q=1.` returns the maximum. /// (`q=0` and `q=1` are considered improper quantiles) /// /// The array is shuffled **in place** in order to produce the required quantile /// without allocating a copy. /// No assumptions should be made on the ordering of the array elements /// after this computation. /// /// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)): /// - average case: O(`m`); /// - worst case: O(`m`^2); /// where `m` is the number of elements in the array. /// /// Returns `Err(EmptyInput)` if the array is empty. /// /// Returns `Err(InvalidQuantile(q))` if `q` is not between `0.` and `1.` (inclusive). fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Result where A: Ord + Clone, I: Interpolate; /// A bulk version of [`quantile_mut`], optimized to retrieve multiple /// quantiles at once. /// /// Returns an `Array`, where the elements of the array correspond to the /// elements of `qs`. /// /// Returns `Err(EmptyInput)` if the array is empty. /// /// Returns `Err(InvalidQuantile(q))` if any `q` in /// `qs` is not between `0.` and `1.` (inclusive). /// /// See [`quantile_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. /// /// [`quantile_mut`]: #tymethod.quantile_mut fn quantiles_mut( &mut self, qs: &ArrayRef, interpolate: &I, ) -> Result, QuantileError> where A: Ord + Clone, I: Interpolate; private_decl! {} } impl Quantile1dExt for ArrayRef { fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Result where A: Ord + Clone, I: Interpolate, { Ok(self .quantile_axis_mut(Axis(0), q, interpolate)? .into_scalar()) } fn quantiles_mut( &mut self, qs: &ArrayRef, interpolate: &I, ) -> Result, QuantileError> where A: Ord + Clone, I: Interpolate, { self.quantiles_axis_mut(Axis(0), qs, interpolate) } private_impl! {} } pub mod interpolate; ================================================ FILE: src/sort.rs ================================================ use indexmap::IndexMap; use ndarray::prelude::*; use ndarray::Slice; use rand::prelude::*; use rand::thread_rng; /// Methods for sorting and partitioning 1-D arrays. pub trait Sort1dExt { /// Return the element that would occupy the `i`-th position if /// the array were sorted in increasing order. /// /// The array is shuffled **in place** to retrieve the desired element: /// no copy of the array is allocated. /// After the shuffling, all elements with an index smaller than `i` /// are smaller than the desired element, while all elements with /// an index greater or equal than `i` are greater than or equal /// to the desired element. /// /// No other assumptions should be made on the ordering of the /// elements after this computation. /// /// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)): /// - average case: O(`n`); /// - worst case: O(`n`^2); /// where n is the number of elements in the array. /// /// **Panics** if `i` is greater than or equal to `n`. fn get_from_sorted_mut(&mut self, i: usize) -> A where A: Ord + Clone; /// A bulk version of [`get_from_sorted_mut`], optimized to retrieve multiple /// indexes at once. /// It returns an `IndexMap`, with indexes as keys and retrieved elements as /// values. /// The `IndexMap` is sorted with respect to indexes in increasing order: /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). /// /// **Panics** if any element in `indexes` is greater than or equal to `n`, /// where `n` is the length of the array.. /// /// [`get_from_sorted_mut`]: #tymethod.get_from_sorted_mut fn get_many_from_sorted_mut(&mut self, indexes: &ArrayRef1) -> IndexMap where A: Ord + Clone; /// Partitions the array in increasing order based on the value initially /// located at `pivot_index` and returns the new index of the value. /// /// The elements are rearranged in such a way that the value initially /// located at `pivot_index` is moved to the position it would be in an /// array sorted in increasing order. The return value is the new index of /// the value after rearrangement. All elements smaller than the value are /// moved to its left and all elements equal or greater than the value are /// moved to its right. The ordering of the elements in the two partitions /// is undefined. /// /// `self` is shuffled **in place** to operate the desired partition: /// no copy of the array is allocated. /// /// The method uses Hoare's partition algorithm. /// Complexity: O(`n`), where `n` is the number of elements in the array. /// Average number of element swaps: n/6 - 1/3 (see /// [link](https://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto/11550)) /// /// **Panics** if `pivot_index` is greater than or equal to `n`. /// /// # Example /// /// ``` /// use ndarray::array; /// use ndarray_stats::Sort1dExt; /// /// let mut data = array![3, 1, 4, 5, 2]; /// let pivot_index = 2; /// let pivot_value = data[pivot_index]; /// /// // Partition by the value located at `pivot_index`. /// let new_index = data.partition_mut(pivot_index); /// // The pivot value is now located at `new_index`. /// assert_eq!(data[new_index], pivot_value); /// // Elements less than that value are moved to the left. /// for i in 0..new_index { /// assert!(data[i] < pivot_value); /// } /// // Elements greater than or equal to that value are moved to the right. /// for i in (new_index + 1)..data.len() { /// assert!(data[i] >= pivot_value); /// } /// ``` fn partition_mut(&mut self, pivot_index: usize) -> usize where A: Ord + Clone; private_decl! {} } impl Sort1dExt for ArrayRef { fn get_from_sorted_mut(&mut self, i: usize) -> A where A: Ord + Clone, { let n = self.len(); if n == 1 { self[0].clone() } else { let mut rng = thread_rng(); let pivot_index = rng.gen_range(0..n); let partition_index = self.partition_mut(pivot_index); if i < partition_index { self.slice_axis_mut(Axis(0), Slice::from(..partition_index)) .get_from_sorted_mut(i) } else if i == partition_index { self[i].clone() } else { self.slice_axis_mut(Axis(0), Slice::from(partition_index + 1..)) .get_from_sorted_mut(i - (partition_index + 1)) } } } fn get_many_from_sorted_mut(&mut self, indexes: &ArrayRef1) -> IndexMap where A: Ord + Clone, { let mut deduped_indexes: Vec = indexes.to_vec(); deduped_indexes.sort_unstable(); deduped_indexes.dedup(); get_many_from_sorted_mut_unchecked(self, &deduped_indexes) } fn partition_mut(&mut self, pivot_index: usize) -> usize where A: Ord + Clone, { let pivot_value = self[pivot_index].clone(); self.swap(pivot_index, 0); let n = self.len(); let mut i = 1; let mut j = n - 1; loop { loop { if i > j { break; } if self[i] >= pivot_value { break; } i += 1; } while pivot_value <= self[j] { if j == 1 { break; } j -= 1; } if i >= j { break; } else { self.swap(i, j); i += 1; j -= 1; } } self.swap(0, i - 1); i - 1 } private_impl! {} } /// To retrieve multiple indexes from the sorted array in an optimized fashion, /// [get_many_from_sorted_mut] first of all sorts and deduplicates the /// `indexes` vector. /// /// `get_many_from_sorted_mut_unchecked` does not perform this sorting and /// deduplication, assuming that the user has already taken care of it. /// /// Useful when you have to call [get_many_from_sorted_mut] multiple times /// using the same indexes. /// /// [get_many_from_sorted_mut]: ../trait.Sort1dExt.html#tymethod.get_many_from_sorted_mut pub(crate) fn get_many_from_sorted_mut_unchecked( array: &mut ArrayRef1, indexes: &[usize], ) -> IndexMap where A: Ord + Clone, { if indexes.is_empty() { return IndexMap::new(); } // Since `!indexes.is_empty()` and indexes must be in-bounds, `array` must // be non-empty. let mut values = vec![array[0].clone(); indexes.len()]; _get_many_from_sorted_mut_unchecked(array.view_mut(), &mut indexes.to_owned(), &mut values); // We convert the vector to a more search-friendly `IndexMap`. indexes.iter().cloned().zip(values.into_iter()).collect() } /// This is the recursive portion of `get_many_from_sorted_mut_unchecked`. /// /// `indexes` is the list of indexes to get. `indexes` is mutable so that it /// can be used as scratch space for this routine; the value of `indexes` after /// calling this routine should be ignored. /// /// `values` is a pre-allocated slice to use for writing the output. Its /// initial element values are ignored. fn _get_many_from_sorted_mut_unchecked( mut array: ArrayViewMut1<'_, A>, indexes: &mut [usize], values: &mut [A], ) where A: Ord + Clone, { let n = array.len(); debug_assert!(n >= indexes.len()); // because indexes must be unique and in-bounds debug_assert_eq!(indexes.len(), values.len()); if indexes.is_empty() { // Nothing to do in this case. return; } // At this point, `n >= 1` since `indexes.len() >= 1`. if n == 1 { // We can only reach this point if `indexes.len() == 1`, so we only // need to assign the single value, and then we're done. debug_assert_eq!(indexes.len(), 1); values[0] = array[0].clone(); return; } // We pick a random pivot index: the corresponding element is the pivot value let mut rng = thread_rng(); let pivot_index = rng.gen_range(0..n); // We partition the array with respect to the pivot value. // The pivot value moves to `array_partition_index`. // Elements strictly smaller than the pivot value have indexes < `array_partition_index`. // Elements greater or equal to the pivot value have indexes > `array_partition_index`. let array_partition_index = array.partition_mut(pivot_index); // We use a divide-and-conquer strategy, splitting the indexes we are // searching for (`indexes`) and the corresponding portions of the output // slice (`values`) into pieces with respect to `array_partition_index`. let (found_exact, index_split) = match indexes.binary_search(&array_partition_index) { Ok(index) => (true, index), Err(index) => (false, index), }; let (smaller_indexes, other_indexes) = indexes.split_at_mut(index_split); let (smaller_values, other_values) = values.split_at_mut(index_split); let (bigger_indexes, bigger_values) = if found_exact { other_values[0] = array[array_partition_index].clone(); // Write exactly found value. (&mut other_indexes[1..], &mut other_values[1..]) } else { (other_indexes, other_values) }; // We search recursively for the values corresponding to strictly smaller // indexes to the left of `partition_index`. _get_many_from_sorted_mut_unchecked( array.slice_axis_mut(Axis(0), Slice::from(..array_partition_index)), smaller_indexes, smaller_values, ); // We search recursively for the values corresponding to strictly bigger // indexes to the right of `partition_index`. Since only the right portion // of the array is passed in, the indexes need to be shifted by length of // the removed portion. bigger_indexes .iter_mut() .for_each(|x| *x -= array_partition_index + 1); _get_many_from_sorted_mut_unchecked( array.slice_axis_mut(Axis(0), Slice::from(array_partition_index + 1..)), bigger_indexes, bigger_values, ); } ================================================ FILE: src/summary_statistics/means.rs ================================================ use super::SummaryStatisticsExt; use crate::errors::{EmptyInput, MultiInputError, ShapeMismatch}; use ndarray::{Array, ArrayBase, ArrayRef, Axis, Data, Dimension, Ix1, RemoveAxis}; use num_integer::IterBinomial; use num_traits::{Float, FromPrimitive, Zero}; use std::ops::{Add, AddAssign, Div, Mul}; impl SummaryStatisticsExt for ArrayRef where D: Dimension, { fn mean(&self) -> Result where A: Clone + FromPrimitive + Add + Div + Zero, { let n_elements = self.len(); if n_elements == 0 { Err(EmptyInput) } else { let n_elements = A::from_usize(n_elements) .expect("Converting number of elements to `A` must not fail."); Ok(self.sum() / n_elements) } } fn weighted_mean(&self, weights: &Self) -> Result where A: Copy + Div + Mul + Zero, { return_err_if_empty!(self); let weighted_sum = self.weighted_sum(weights)?; Ok(weighted_sum / weights.sum()) } fn weighted_sum(&self, weights: &ArrayRef) -> Result where A: Copy + Mul + Zero, { return_err_unless_same_shape!(self, weights); Ok(self .iter() .zip(weights) .fold(A::zero(), |acc, (&d, &w)| acc + d * w)) } fn weighted_mean_axis( &self, axis: Axis, weights: &ArrayRef, ) -> Result, MultiInputError> where A: Copy + Div + Mul + Zero, D: RemoveAxis, { return_err_if_empty!(self); let mut weighted_sum = self.weighted_sum_axis(axis, weights)?; let weights_sum = weights.sum(); weighted_sum.mapv_inplace(|v| v / weights_sum); Ok(weighted_sum) } fn weighted_sum_axis( &self, axis: Axis, weights: &ArrayRef, ) -> Result, MultiInputError> where A: Copy + Mul + Zero, D: RemoveAxis, { if self.shape()[axis.index()] != weights.len() { return Err(MultiInputError::ShapeMismatch(ShapeMismatch { first_shape: self.shape().to_vec(), second_shape: weights.shape().to_vec(), })); } // We could use `lane.weighted_sum` here, but we're avoiding 2 // conditions and an unwrap per lane. Ok(self.map_axis(axis, |lane| { lane.iter() .zip(weights) .fold(A::zero(), |acc, (&d, &w)| acc + d * w) })) } fn harmonic_mean(&self) -> Result where A: Float + FromPrimitive, { self.map(|x| x.recip()) .mean() .map(|x| x.recip()) .ok_or(EmptyInput) } fn geometric_mean(&self) -> Result where A: Float + FromPrimitive, { self.map(|x| x.ln()) .mean() .map(|x| x.exp()) .ok_or(EmptyInput) } fn weighted_var(&self, weights: &Self, ddof: A) -> Result where A: AddAssign + Float + FromPrimitive, { return_err_if_empty!(self); return_err_unless_same_shape!(self, weights); let zero = A::from_usize(0).expect("Converting 0 to `A` must not fail."); let one = A::from_usize(1).expect("Converting 1 to `A` must not fail."); assert!( !(ddof < zero || ddof > one), "`ddof` must not be less than zero or greater than one", ); inner_weighted_var(self, weights, ddof, zero) } fn weighted_std(&self, weights: &Self, ddof: A) -> Result where A: AddAssign + Float + FromPrimitive, { Ok(self.weighted_var(weights, ddof)?.sqrt()) } fn weighted_var_axis( &self, axis: Axis, weights: &ArrayRef, ddof: A, ) -> Result, MultiInputError> where A: AddAssign + Float + FromPrimitive, D: RemoveAxis, { return_err_if_empty!(self); if self.shape()[axis.index()] != weights.len() { return Err(MultiInputError::ShapeMismatch(ShapeMismatch { first_shape: self.shape().to_vec(), second_shape: weights.shape().to_vec(), })); } let zero = A::from_usize(0).expect("Converting 0 to `A` must not fail."); let one = A::from_usize(1).expect("Converting 1 to `A` must not fail."); assert!( !(ddof < zero || ddof > one), "`ddof` must not be less than zero or greater than one", ); // `weights` must be a view because `lane` is a view in this context. let weights = weights.view(); Ok(self.map_axis(axis, |lane| { inner_weighted_var(&lane, &weights, ddof, zero).unwrap() })) } fn weighted_std_axis( &self, axis: Axis, weights: &ArrayRef, ddof: A, ) -> Result, MultiInputError> where A: AddAssign + Float + FromPrimitive, D: RemoveAxis, { Ok(self .weighted_var_axis(axis, weights, ddof)? .mapv_into(|x| x.sqrt())) } fn kurtosis(&self) -> Result where A: Float + FromPrimitive, { let central_moments = self.central_moments(4)?; Ok(central_moments[4] / central_moments[2].powi(2)) } fn skewness(&self) -> Result where A: Float + FromPrimitive, { let central_moments = self.central_moments(3)?; Ok(central_moments[3] / central_moments[2].sqrt().powi(3)) } fn central_moment(&self, order: u16) -> Result where A: Float + FromPrimitive, { if self.is_empty() { return Err(EmptyInput); } match order { 0 => Ok(A::one()), 1 => Ok(A::zero()), n => { let mean = self.mean().unwrap(); let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); let correction_term = -shifted_moments[1]; let coefficients = central_moment_coefficients(&shifted_moments); Ok(horner_method(coefficients, correction_term)) } } } fn central_moments(&self, order: u16) -> Result, EmptyInput> where A: Float + FromPrimitive, { if self.is_empty() { return Err(EmptyInput); } match order { 0 => Ok(vec![A::one()]), 1 => Ok(vec![A::one(), A::zero()]), n => { // We only perform these operations once, and then reuse their // result to compute all the required moments let mean = self.mean().unwrap(); let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); let correction_term = -shifted_moments[1]; let mut central_moments = vec![A::one(), A::zero()]; for k in 2..=n { let coefficients = central_moment_coefficients(&shifted_moments[..=(k as usize)]); let central_moment = horner_method(coefficients, correction_term); central_moments.push(central_moment) } Ok(central_moments) } } } private_impl! {} } /// Private function for `weighted_var` without conditions and asserts. fn inner_weighted_var( arr: &ArrayRef, weights: &ArrayRef, ddof: A, zero: A, ) -> Result where A: AddAssign + Float + FromPrimitive, D: Dimension, { let mut weight_sum = zero; let mut mean = zero; let mut s = zero; for (&x, &w) in arr.iter().zip(weights.iter()) { weight_sum += w; let x_minus_mean = x - mean; mean += (w / weight_sum) * x_minus_mean; s += w * x_minus_mean * (x - mean); } Ok(s / (weight_sum - ddof)) } /// Returns a vector containing all moments of the array elements up to /// *order*, where the *p*-th moment is defined as: /// /// ```text /// 1 n /// ― ∑ xᵢᵖ /// n i=1 /// ``` /// /// The returned moments are ordered by power magnitude: 0th moment, 1st moment, etc. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. fn moments(a: ArrayBase, order: u16) -> Vec where A: Float + FromPrimitive, S: Data, D: Dimension, { let n_elements = A::from_usize(a.len()).expect("Converting number of elements to `A` must not fail"); let order = i32::from(order); // When k=0, we are raising each element to the 0th power // No need to waste CPU cycles going through the array let mut moments = vec![A::one()]; if order >= 1 { // When k=1, we don't need to raise elements to the 1th power (identity) moments.push(a.sum() / n_elements) } for k in 2..=order { moments.push(a.map(|x| x.powi(k)).sum() / n_elements) } moments } /// Returns the coefficients in the polynomial expression to compute the *p*th /// central moment as a function of the sample mean. /// /// It takes as input all moments up to order *p*, ordered by power magnitude - *p* is /// inferred to be the length of the *moments* array. fn central_moment_coefficients(moments: &[A]) -> Vec where A: Float + FromPrimitive, { let order = moments.len(); IterBinomial::new(order) .zip(moments.iter().rev()) .map(|(binom, &moment)| A::from_usize(binom).unwrap() * moment) .collect() } /// Uses [Horner's method] to evaluate a polynomial with a single indeterminate. /// /// Coefficients are expected to be sorted by ascending order /// with respect to the indeterminate's exponent. /// /// If the array is empty, `A::zero()` is returned. /// /// Horner's method can evaluate a polynomial of order *n* with a single indeterminate /// using only *n-1* multiplications and *n-1* sums - in terms of number of operations, /// this is an optimal algorithm for polynomial evaluation. /// /// [Horner's method]: https://en.wikipedia.org/wiki/Horner%27s_method fn horner_method(coefficients: Vec, indeterminate: A) -> A where A: Float, { let mut result = A::zero(); for coefficient in coefficients.into_iter().rev() { result = coefficient + indeterminate * result } result } ================================================ FILE: src/summary_statistics/mod.rs ================================================ //! Summary statistics (e.g. mean, variance, etc.). use crate::errors::{EmptyInput, MultiInputError}; use ndarray::{Array, ArrayRef, Axis, Dimension, Ix1, RemoveAxis}; use num_traits::{Float, FromPrimitive, Zero}; use std::ops::{Add, AddAssign, Div, Mul}; /// Extension trait for `ArrayRef` providing methods /// to compute several summary statistics (e.g. mean, variance, etc.). pub trait SummaryStatisticsExt where D: Dimension, { /// Returns the [`arithmetic mean`] x̅ of all elements in the array: /// /// ```text /// 1 n /// x̅ = ― ∑ xᵢ /// n i=1 /// ``` /// /// If the array is empty, `Err(EmptyInput)` is returned. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [`arithmetic mean`]: https://en.wikipedia.org/wiki/Arithmetic_mean fn mean(&self) -> Result where A: Clone + FromPrimitive + Add + Div + Zero; /// Returns the [`arithmetic weighted mean`] x̅ of all elements in the array. Use `weighted_sum` /// if the `weights` are normalized (they sum up to 1.0). /// /// ```text /// n /// ∑ wᵢxᵢ /// i=1 /// x̅ = ――――――――― /// n /// ∑ wᵢ /// i=1 /// ``` /// /// **Panics** if division by zero panics for type A. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape /// /// [`arithmetic weighted mean`] https://en.wikipedia.org/wiki/Weighted_arithmetic_mean fn weighted_mean(&self, weights: &Self) -> Result where A: Copy + Div + Mul + Zero; /// Returns the weighted sum of all elements in the array, that is, the dot product of the /// arrays `self` and `weights`. Equivalent to `weighted_mean` if the `weights` are normalized. /// /// ```text /// n /// x̅ = ∑ wᵢxᵢ /// i=1 /// ``` /// /// The following **errors** may be returned: /// /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape fn weighted_sum(&self, weights: &Self) -> Result where A: Copy + Mul + Zero; /// Returns the [`arithmetic weighted mean`] x̅ along `axis`. Use `weighted_mean_axis ` if the /// `weights` are normalized. /// /// ```text /// n /// ∑ wᵢxᵢ /// i=1 /// x̅ = ――――――――― /// n /// ∑ wᵢ /// i=1 /// ``` /// /// **Panics** if `axis` is out of bounds. /// /// The following **errors** may be returned: /// /// * `MultiInputError::EmptyInput` if `self` is empty /// * `MultiInputError::ShapeMismatch` if `self` length along axis is not equal to `weights` length /// /// [`arithmetic weighted mean`] https://en.wikipedia.org/wiki/Weighted_arithmetic_mean fn weighted_mean_axis( &self, axis: Axis, weights: &ArrayRef, ) -> Result, MultiInputError> where A: Copy + Div + Mul + Zero, D: RemoveAxis; /// Returns the weighted sum along `axis`, that is, the dot product of `weights` and each lane /// of `self` along `axis`. Equivalent to `weighted_mean_axis` if the `weights` are normalized. /// /// ```text /// n /// x̅ = ∑ wᵢxᵢ /// i=1 /// ``` /// /// **Panics** if `axis` is out of bounds. /// /// The following **errors** may be returned /// /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape fn weighted_sum_axis( &self, axis: Axis, weights: &ArrayRef, ) -> Result, MultiInputError> where A: Copy + Mul + Zero, D: RemoveAxis; /// Returns the [`harmonic mean`] `HM(X)` of all elements in the array: /// /// ```text /// ⎛ n ⎞⁻¹ /// HM(X) = n ⎜ ∑ xᵢ⁻¹⎟ /// ⎝i=1 ⎠ /// ``` /// /// If the array is empty, `Err(EmptyInput)` is returned. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [`harmonic mean`]: https://en.wikipedia.org/wiki/Harmonic_mean fn harmonic_mean(&self) -> Result where A: Float + FromPrimitive; /// Returns the [`geometric mean`] `GM(X)` of all elements in the array: /// /// ```text /// ⎛ n ⎞¹⁄ₙ /// GM(X) = ⎜ ∏ xᵢ⎟ /// ⎝i=1 ⎠ /// ``` /// /// If the array is empty, `Err(EmptyInput)` is returned. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [`geometric mean`]: https://en.wikipedia.org/wiki/Geometric_mean fn geometric_mean(&self) -> Result where A: Float + FromPrimitive; /// Return weighted variance of all elements in the array. /// /// The weighted variance is computed using the [`West, D. H. D.`] incremental algorithm. /// Equivalent to `var_axis` if the `weights` are normalized. /// /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`. /// /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds, /// or if `A::from_usize()` fails for zero or one. /// /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm fn weighted_var(&self, weights: &Self, ddof: A) -> Result where A: AddAssign + Float + FromPrimitive; /// Return weighted standard deviation of all elements in the array. /// /// The weighted standard deviation is computed using the [`West, D. H. D.`] incremental /// algorithm. Equivalent to `var_axis` if the `weights` are normalized. /// /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`. /// /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds, /// or if `A::from_usize()` fails for zero or one. /// /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm fn weighted_std(&self, weights: &Self, ddof: A) -> Result where A: AddAssign + Float + FromPrimitive; /// Return weighted variance along `axis`. /// /// The weighted variance is computed using the [`West, D. H. D.`] incremental algorithm. /// Equivalent to `var_axis` if the `weights` are normalized. /// /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`. /// /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds, /// or if `A::from_usize()` fails for zero or one. /// /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm fn weighted_var_axis( &self, axis: Axis, weights: &ArrayRef, ddof: A, ) -> Result, MultiInputError> where A: AddAssign + Float + FromPrimitive, D: RemoveAxis; /// Return weighted standard deviation along `axis`. /// /// The weighted standard deviation is computed using the [`West, D. H. D.`] incremental /// algorithm. Equivalent to `var_axis` if the `weights` are normalized. /// /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`. /// /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds, /// or if `A::from_usize()` fails for zero or one. /// /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm fn weighted_std_axis( &self, axis: Axis, weights: &ArrayRef, ddof: A, ) -> Result, MultiInputError> where A: AddAssign + Float + FromPrimitive, D: RemoveAxis; /// Returns the [kurtosis] `Kurt[X]` of all elements in the array: /// /// ```text /// Kurt[X] = μ₄ / σ⁴ /// ``` /// /// where μ₄ is the fourth central moment and σ is the standard deviation of /// the elements in the array. /// /// This is sometimes referred to as _Pearson's kurtosis_. Fisher's kurtosis can be /// computed by subtracting 3 from Pearson's kurtosis. /// /// If the array is empty, `Err(EmptyInput)` is returned. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [kurtosis]: https://en.wikipedia.org/wiki/Kurtosis fn kurtosis(&self) -> Result where A: Float + FromPrimitive; /// Returns the [Pearson's moment coefficient of skewness] γ₁ of all elements in the array: /// /// ```text /// γ₁ = μ₃ / σ³ /// ``` /// /// where μ₃ is the third central moment and σ is the standard deviation of /// the elements in the array. /// /// If the array is empty, `Err(EmptyInput)` is returned. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [Pearson's moment coefficient of skewness]: https://en.wikipedia.org/wiki/Skewness fn skewness(&self) -> Result where A: Float + FromPrimitive; /// Returns the *p*-th [central moment] of all elements in the array, μₚ: /// /// ```text /// 1 n /// μₚ = ― ∑ (xᵢ-x̅)ᵖ /// n i=1 /// ``` /// /// If the array is empty, `Err(EmptyInput)` is returned. /// /// The *p*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5 /// in [Pébay et al., 2016]). Complexity is *O(np)* when *n >> p*, *p > 1*. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements /// in the array or if `order` overflows `i32`. /// /// [central moment]: https://en.wikipedia.org/wiki/Central_moment /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275 fn central_moment(&self, order: u16) -> Result where A: Float + FromPrimitive; /// Returns the first *p* [central moments] of all elements in the array, see [central moment] /// for more details. /// /// If the array is empty, `Err(EmptyInput)` is returned. /// /// This method reuses the intermediate steps for the *k*-th moment to compute the *(k+1)*-th, /// being thus more efficient than repeated calls to [central moment] if the computation /// of central moments of multiple orders is required. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements /// in the array or if `order` overflows `i32`. /// /// [central moments]: https://en.wikipedia.org/wiki/Central_moment /// [central moment]: #tymethod.central_moment fn central_moments(&self, order: u16) -> Result, EmptyInput> where A: Float + FromPrimitive; private_decl! {} } mod means; ================================================ FILE: tests/deviation.rs ================================================ use ndarray_stats::errors::{MultiInputError, ShapeMismatch}; use ndarray_stats::DeviationExt; use approx::assert_abs_diff_eq; use ndarray::{array, Array1}; use num_bigint::BigInt; use num_traits::Float; use std::f64; #[test] fn test_count_eq() -> Result<(), MultiInputError> { let a = array![0., 0.]; let b = array![1., 0.]; let c = array![0., 1.]; let d = array![1., 1.]; assert_eq!(a.count_eq(&a)?, 2); assert_eq!(a.count_eq(&b)?, 1); assert_eq!(a.count_eq(&c)?, 1); assert_eq!(a.count_eq(&d)?, 0); Ok(()) } #[test] fn test_count_neq() -> Result<(), MultiInputError> { let a = array![0., 0.]; let b = array![1., 0.]; let c = array![0., 1.]; let d = array![1., 1.]; assert_eq!(a.count_neq(&a)?, 0); assert_eq!(a.count_neq(&b)?, 1); assert_eq!(a.count_neq(&c)?, 1); assert_eq!(a.count_neq(&d)?, 2); Ok(()) } #[test] fn test_sq_l2_dist() -> Result<(), MultiInputError> { let a = array![0., 1., 4., 2.]; let b = array![1., 1., 2., 4.]; assert_eq!(a.sq_l2_dist(&b)?, 9.); Ok(()) } #[test] fn test_l2_dist() -> Result<(), MultiInputError> { let a = array![0., 1., 4., 2.]; let b = array![1., 1., 2., 4.]; assert_eq!(a.l2_dist(&b)?, 3.); Ok(()) } #[test] fn test_l1_dist() -> Result<(), MultiInputError> { let a = array![0., 1., 4., 2.]; let b = array![1., 1., 2., 4.]; assert_eq!(a.l1_dist(&b)?, 5.); Ok(()) } #[test] fn test_linf_dist() -> Result<(), MultiInputError> { let a = array![0., 0.]; let b = array![1., 0.]; let c = array![1., 2.]; assert_eq!(a.linf_dist(&a)?, 0.); assert_eq!(a.linf_dist(&b)?, 1.); assert_eq!(b.linf_dist(&a)?, 1.); assert_eq!(a.linf_dist(&c)?, 2.); assert_eq!(c.linf_dist(&a)?, 2.); Ok(()) } #[test] fn test_mean_abs_err() -> Result<(), MultiInputError> { let a = array![1., 1.]; let b = array![3., 5.]; assert_eq!(a.mean_abs_err(&a)?, 0.); assert_eq!(a.mean_abs_err(&b)?, 3.); assert_eq!(b.mean_abs_err(&a)?, 3.); Ok(()) } #[test] fn test_mean_sq_err() -> Result<(), MultiInputError> { let a = array![1., 1.]; let b = array![3., 5.]; assert_eq!(a.mean_sq_err(&a)?, 0.); assert_eq!(a.mean_sq_err(&b)?, 10.); assert_eq!(b.mean_sq_err(&a)?, 10.); Ok(()) } #[test] fn test_root_mean_sq_err() -> Result<(), MultiInputError> { let a = array![1., 1.]; let b = array![3., 5.]; assert_eq!(a.root_mean_sq_err(&a)?, 0.); assert_abs_diff_eq!(a.root_mean_sq_err(&b)?, 10.0.sqrt()); assert_abs_diff_eq!(b.root_mean_sq_err(&a)?, 10.0.sqrt()); Ok(()) } #[test] fn test_peak_signal_to_noise_ratio() -> Result<(), MultiInputError> { let a = array![1., 1.]; assert!(a.peak_signal_to_noise_ratio(&a, 1.)?.is_infinite()); let a = array![1., 2., 3., 4., 5., 6., 7.]; let b = array![1., 3., 3., 4., 6., 7., 8.]; let maxv = 8.; let expected = 20. * Float::log10(maxv) - 10. * Float::log10(a.mean_sq_err(&b)?); let actual = a.peak_signal_to_noise_ratio(&b, maxv)?; assert_abs_diff_eq!(actual, expected); Ok(()) } #[test] fn test_deviations_with_n_by_m_ints() -> Result<(), MultiInputError> { let a = array![[0, 1], [4, 2]]; let b = array![[1, 1], [2, 4]]; assert_eq!(a.count_eq(&a)?, 4); assert_eq!(a.count_neq(&a)?, 0); assert_eq!(a.sq_l2_dist(&b)?, 9); assert_eq!(a.l2_dist(&b)?, 3.); assert_eq!(a.l1_dist(&b)?, 5); assert_eq!(a.linf_dist(&b)?, 2); assert_abs_diff_eq!(a.mean_abs_err(&b)?, 1.25); assert_abs_diff_eq!(a.mean_sq_err(&b)?, 2.25); assert_abs_diff_eq!(a.root_mean_sq_err(&b)?, 1.5); assert_abs_diff_eq!(a.peak_signal_to_noise_ratio(&b, 4)?, 8.519374645445623); Ok(()) } #[test] fn test_deviations_with_empty_receiver() { let a: Array1 = array![]; let b: Array1 = array![1.]; assert_eq!(a.count_eq(&b), Err(MultiInputError::EmptyInput)); assert_eq!(a.count_neq(&b), Err(MultiInputError::EmptyInput)); assert_eq!(a.sq_l2_dist(&b), Err(MultiInputError::EmptyInput)); assert_eq!(a.l2_dist(&b), Err(MultiInputError::EmptyInput)); assert_eq!(a.l1_dist(&b), Err(MultiInputError::EmptyInput)); assert_eq!(a.linf_dist(&b), Err(MultiInputError::EmptyInput)); assert_eq!(a.mean_abs_err(&b), Err(MultiInputError::EmptyInput)); assert_eq!(a.mean_sq_err(&b), Err(MultiInputError::EmptyInput)); assert_eq!(a.root_mean_sq_err(&b), Err(MultiInputError::EmptyInput)); assert_eq!( a.peak_signal_to_noise_ratio(&b, 0.), Err(MultiInputError::EmptyInput) ); } #[test] fn test_deviations_do_not_panic_if_nans() -> Result<(), MultiInputError> { let a: Array1 = array![1., f64::NAN, 3., f64::NAN]; let b: Array1 = array![1., f64::NAN, 3., 4.]; assert_eq!(a.count_eq(&b)?, 2); assert_eq!(a.count_neq(&b)?, 2); assert!(a.sq_l2_dist(&b)?.is_nan()); assert!(a.l2_dist(&b)?.is_nan()); assert!(a.l1_dist(&b)?.is_nan()); assert_eq!(a.linf_dist(&b)?, 0.); assert!(a.mean_abs_err(&b)?.is_nan()); assert!(a.mean_sq_err(&b)?.is_nan()); assert!(a.root_mean_sq_err(&b)?.is_nan()); assert!(a.peak_signal_to_noise_ratio(&b, 0.)?.is_nan()); Ok(()) } #[test] fn test_deviations_with_empty_argument() { let a: Array1 = array![1.]; let b: Array1 = array![]; let shape_mismatch_err = MultiInputError::ShapeMismatch(ShapeMismatch { first_shape: a.shape().to_vec(), second_shape: b.shape().to_vec(), }); let expected_err_usize = Err(shape_mismatch_err.clone()); let expected_err_f64 = Err(shape_mismatch_err); assert_eq!(a.count_eq(&b), expected_err_usize); assert_eq!(a.count_neq(&b), expected_err_usize); assert_eq!(a.sq_l2_dist(&b), expected_err_f64); assert_eq!(a.l2_dist(&b), expected_err_f64); assert_eq!(a.l1_dist(&b), expected_err_f64); assert_eq!(a.linf_dist(&b), expected_err_f64); assert_eq!(a.mean_abs_err(&b), expected_err_f64); assert_eq!(a.mean_sq_err(&b), expected_err_f64); assert_eq!(a.root_mean_sq_err(&b), expected_err_f64); assert_eq!(a.peak_signal_to_noise_ratio(&b, 0.), expected_err_f64); } #[test] fn test_deviations_with_non_copyable() -> Result<(), MultiInputError> { let a: Array1 = array![0.into(), 1.into(), 4.into(), 2.into()]; let b: Array1 = array![1.into(), 1.into(), 2.into(), 4.into()]; assert_eq!(a.count_eq(&a)?, 4); assert_eq!(a.count_neq(&a)?, 0); assert_eq!(a.sq_l2_dist(&b)?, 9.into()); assert_eq!(a.l2_dist(&b)?, 3.); assert_eq!(a.l1_dist(&b)?, 5.into()); assert_eq!(a.linf_dist(&b)?, 2.into()); assert_abs_diff_eq!(a.mean_abs_err(&b)?, 1.25); assert_abs_diff_eq!(a.mean_sq_err(&b)?, 2.25); assert_abs_diff_eq!(a.root_mean_sq_err(&b)?, 1.5); assert_abs_diff_eq!( a.peak_signal_to_noise_ratio(&b, 4.into())?, 8.519374645445623 ); Ok(()) } #[test] fn test_deviation_computation_for_mixed_ownership() { // It's enough to check that the code compiles! let a = array![0., 0.]; let b = array![1., 0.]; let _ = a.count_eq(&b.view()); let _ = a.count_neq(&b.view()); let _ = a.l2_dist(&b.view()); let _ = a.sq_l2_dist(&b.view()); let _ = a.l1_dist(&b.view()); let _ = a.linf_dist(&b.view()); let _ = a.mean_abs_err(&b.view()); let _ = a.mean_sq_err(&b.view()); let _ = a.root_mean_sq_err(&b.view()); let _ = a.peak_signal_to_noise_ratio(&b.view(), 10.); } ================================================ FILE: tests/maybe_nan.rs ================================================ use ndarray::prelude::*; use ndarray_stats::MaybeNan; use noisy_float::types::{n64, N64}; #[test] fn remove_nan_mut_nonstandard_layout() { fn eq_unordered(mut a: Vec, mut b: Vec) -> bool { a.sort(); b.sort(); a == b } let a = aview1(&[1., 2., f64::NAN, f64::NAN, 3., f64::NAN, 4., 5.]); { let mut a = a.to_owned(); let v = f64::remove_nan_mut(a.slice_mut(s![..;2])); assert!(eq_unordered(v.to_vec(), vec![n64(1.), n64(3.), n64(4.)])); } { let mut a = a.to_owned(); let v = f64::remove_nan_mut(a.slice_mut(s![..;-1])); assert!(eq_unordered( v.to_vec(), vec![n64(5.), n64(4.), n64(3.), n64(2.), n64(1.)], )); } { let mut a = a.to_owned(); let v = f64::remove_nan_mut(a.slice_mut(s![..;-2])); assert!(eq_unordered(v.to_vec(), vec![n64(5.), n64(2.)])); } } ================================================ FILE: tests/quantile.rs ================================================ use itertools::izip; use ndarray::array; use ndarray::prelude::*; use ndarray_stats::{ errors::{EmptyInput, MinMaxError, QuantileError}, interpolate::{Higher, Interpolate, Linear, Lower, Midpoint, Nearest}, Quantile1dExt, QuantileExt, }; use noisy_float::types::{n64, N64}; use quickcheck_macros::quickcheck; #[test] fn test_argmin() { let a = array![[1, 5, 3], [2, 0, 6]]; assert_eq!(a.argmin(), Ok((1, 1))); let a = array![[1., 5., 3.], [2., 0., 6.]]; assert_eq!(a.argmin(), Ok((1, 1))); let a = array![[1., 5., 3.], [2., ::std::f64::NAN, 6.]]; assert_eq!(a.argmin(), Err(MinMaxError::UndefinedOrder)); let a: Array2 = array![[], []]; assert_eq!(a.argmin(), Err(MinMaxError::EmptyInput)); } #[quickcheck] fn argmin_matches_min(data: Vec) -> bool { let a = Array1::from(data); a.argmin().map(|i| &a[i]) == a.min() } #[test] fn test_argmin_skipnan() { let a = array![[1., 5., 3.], [2., 0., 6.]]; assert_eq!(a.argmin_skipnan(), Ok((1, 1))); let a = array![[1., 5., 3.], [2., ::std::f64::NAN, 6.]]; assert_eq!(a.argmin_skipnan(), Ok((0, 0))); let a = array![[::std::f64::NAN, 5., 3.], [2., ::std::f64::NAN, 6.]]; assert_eq!(a.argmin_skipnan(), Ok((1, 0))); let a: Array2 = array![[], []]; assert_eq!(a.argmin_skipnan(), Err(EmptyInput)); let a = arr2(&[[::std::f64::NAN; 2]; 2]); assert_eq!(a.argmin_skipnan(), Err(EmptyInput)); } #[quickcheck] fn argmin_skipnan_matches_min_skipnan(data: Vec>) -> bool { let a = Array1::from(data); let min = a.min_skipnan(); let argmin = a.argmin_skipnan(); if min.is_none() { argmin == Err(EmptyInput) } else { a[argmin.unwrap()] == *min } } #[test] fn test_min() { let a = array![[1, 5, 3], [2, 0, 6]]; assert_eq!(a.min(), Ok(&0)); let a = array![[1., 5., 3.], [2., 0., 6.]]; assert_eq!(a.min(), Ok(&0.)); let a = array![[1., 5., 3.], [2., ::std::f64::NAN, 6.]]; assert_eq!(a.min(), Err(MinMaxError::UndefinedOrder)); } #[test] fn test_min_skipnan() { let a = array![[1., 5., 3.], [2., 0., 6.]]; assert_eq!(a.min_skipnan(), &0.); let a = array![[1., 5., 3.], [2., ::std::f64::NAN, 6.]]; assert_eq!(a.min_skipnan(), &1.); } #[test] fn test_min_skipnan_all_nan() { let a = arr2(&[[::std::f64::NAN; 3]; 2]); assert!(a.min_skipnan().is_nan()); } #[test] fn test_argmax() { let a = array![[1, 5, 3], [2, 0, 6]]; assert_eq!(a.argmax(), Ok((1, 2))); let a = array![[1., 5., 3.], [2., 0., 6.]]; assert_eq!(a.argmax(), Ok((1, 2))); let a = array![[1., 5., 3.], [2., ::std::f64::NAN, 6.]]; assert_eq!(a.argmax(), Err(MinMaxError::UndefinedOrder)); let a: Array2 = array![[], []]; assert_eq!(a.argmax(), Err(MinMaxError::EmptyInput)); } #[quickcheck] fn argmax_matches_max(data: Vec) -> bool { let a = Array1::from(data); a.argmax().map(|i| &a[i]) == a.max() } #[test] fn test_argmax_skipnan() { let a = array![[1., 5., 3.], [2., 0., 6.]]; assert_eq!(a.argmax_skipnan(), Ok((1, 2))); let a = array![[1., 5., 3.], [2., ::std::f64::NAN, ::std::f64::NAN]]; assert_eq!(a.argmax_skipnan(), Ok((0, 1))); let a = array![ [::std::f64::NAN, ::std::f64::NAN, 3.], [2., ::std::f64::NAN, 6.] ]; assert_eq!(a.argmax_skipnan(), Ok((1, 2))); let a: Array2 = array![[], []]; assert_eq!(a.argmax_skipnan(), Err(EmptyInput)); let a = arr2(&[[::std::f64::NAN; 2]; 2]); assert_eq!(a.argmax_skipnan(), Err(EmptyInput)); } #[quickcheck] fn argmax_skipnan_matches_max_skipnan(data: Vec>) -> bool { let a = Array1::from(data); let max = a.max_skipnan(); let argmax = a.argmax_skipnan(); if max.is_none() { argmax == Err(EmptyInput) } else { a[argmax.unwrap()] == *max } } #[test] fn test_max() { let a = array![[1, 5, 7], [2, 0, 6]]; assert_eq!(a.max(), Ok(&7)); let a = array![[1., 5., 7.], [2., 0., 6.]]; assert_eq!(a.max(), Ok(&7.)); let a = array![[1., 5., 7.], [2., ::std::f64::NAN, 6.]]; assert_eq!(a.max(), Err(MinMaxError::UndefinedOrder)); } #[test] fn test_max_skipnan() { let a = array![[1., 5., 7.], [2., 0., 6.]]; assert_eq!(a.max_skipnan(), &7.); let a = array![[1., 5., 7.], [2., ::std::f64::NAN, 6.]]; assert_eq!(a.max_skipnan(), &7.); } #[test] fn test_max_skipnan_all_nan() { let a = arr2(&[[::std::f64::NAN; 3]; 2]); assert!(a.max_skipnan().is_nan()); } #[test] fn test_quantile_axis_mut_with_odd_axis_length() { let mut a = arr2(&[[1, 3, 2, 10], [2, 4, 3, 11], [3, 5, 6, 12]]); let p = a.quantile_axis_mut(Axis(0), n64(0.5), &Lower).unwrap(); assert!(p == a.index_axis(Axis(0), 1)); } #[test] fn test_quantile_axis_mut_with_zero_axis_length() { let mut a = Array2::::zeros((5, 0)); assert_eq!( a.quantile_axis_mut(Axis(1), n64(0.5), &Lower), Err(QuantileError::EmptyInput) ); } #[test] fn test_quantile_axis_mut_with_empty_array() { let mut a = Array2::::zeros((5, 0)); let p = a.quantile_axis_mut(Axis(0), n64(0.5), &Lower).unwrap(); assert_eq!(p.shape(), &[0]); } #[test] fn test_quantile_axis_mut_with_even_axis_length() { let mut b = arr2(&[[1, 3, 2, 10], [2, 4, 3, 11], [3, 5, 6, 12], [4, 6, 7, 13]]); let q = b.quantile_axis_mut(Axis(0), n64(0.5), &Lower).unwrap(); assert!(q == b.index_axis(Axis(0), 1)); } #[test] fn test_quantile_axis_mut_to_get_minimum() { let mut b = arr2(&[[1, 3, 22, 10]]); let q = b.quantile_axis_mut(Axis(1), n64(0.), &Lower).unwrap(); assert!(q == arr1(&[1])); } #[test] fn test_quantile_axis_mut_to_get_maximum() { let mut b = arr1(&[1, 3, 22, 10]); let q = b.quantile_axis_mut(Axis(0), n64(1.), &Lower).unwrap(); assert!(q == arr0(22)); } #[test] fn test_quantile_axis_skipnan_mut_higher_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); let q = a .quantile_axis_skipnan_mut(Axis(1), n64(0.6), &Higher) .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); assert!(q[1].is_none()); } #[test] fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); let q = a .quantile_axis_skipnan_mut(Axis(1), n64(0.6), &Nearest) .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); assert!(q[1].is_none()); } #[test] fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); let q = a .quantile_axis_skipnan_mut(Axis(1), n64(0.6), &Midpoint) .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); assert!(q[1].is_none()); } #[test] fn test_quantile_axis_skipnan_mut_linear_f64() { let mut a = arr2(&[[1., 2., ::std::f64::NAN, 3.], [::std::f64::NAN; 4]]); let q = a .quantile_axis_skipnan_mut(Axis(1), n64(0.75), &Linear) .unwrap(); assert_eq!(q.shape(), &[2]); assert!((q[0] - 2.5).abs() < 1e-12); assert!(q[1].is_nan()); } #[test] fn test_quantile_axis_skipnan_mut_linear_opt_i32() { let mut a = arr2(&[[Some(2), Some(4), None, Some(1)], [None; 4]]); let q = a .quantile_axis_skipnan_mut(Axis(1), n64(0.75), &Linear) .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); assert!(q[1].is_none()); } #[test] fn test_midpoint_overflow() { // Regression test // This triggered an overflow panic with a naive Midpoint implementation: (a+b)/2 let mut a: Array1 = array![129, 130, 130, 131]; let median = a.quantile_mut(n64(0.5), &Midpoint).unwrap(); let expected_median = 130; assert_eq!(median, expected_median); } #[quickcheck] fn test_quantiles_mut(xs: Vec) -> bool { let v = Array::from(xs.clone()); // Unordered list of quantile indexes to look up, with a duplicate let quantile_indexes = Array::from(vec![ n64(0.75), n64(0.90), n64(0.95), n64(0.99), n64(1.), n64(0.), n64(0.25), n64(0.5), n64(0.5), ]); let mut correct = true; correct &= check_one_interpolation_method_for_quantiles_mut( v.clone(), quantile_indexes.view(), &Linear, ); correct &= check_one_interpolation_method_for_quantiles_mut( v.clone(), quantile_indexes.view(), &Higher, ); correct &= check_one_interpolation_method_for_quantiles_mut( v.clone(), quantile_indexes.view(), &Lower, ); correct &= check_one_interpolation_method_for_quantiles_mut( v.clone(), quantile_indexes.view(), &Midpoint, ); correct &= check_one_interpolation_method_for_quantiles_mut( v.clone(), quantile_indexes.view(), &Nearest, ); correct } fn check_one_interpolation_method_for_quantiles_mut( mut v: Array1, quantile_indexes: ArrayView1<'_, N64>, interpolate: &impl Interpolate, ) -> bool { let bulk_quantiles = v.clone().quantiles_mut(&quantile_indexes, interpolate); if v.len() == 0 { bulk_quantiles.is_err() } else { let bulk_quantiles = bulk_quantiles.unwrap(); izip!(quantile_indexes, &bulk_quantiles).all(|(&quantile_index, &quantile)| { quantile == v.quantile_mut(quantile_index, interpolate).unwrap() }) } } #[quickcheck] fn test_quantiles_axis_mut(mut xs: Vec) -> bool { // We want a square matrix let axis_length = (xs.len() as f64).sqrt().floor() as usize; xs.truncate(axis_length * axis_length); let m = Array::from_shape_vec((axis_length, axis_length), xs).unwrap(); // Unordered list of quantile indexes to look up, with a duplicate let quantile_indexes = Array::from(vec![ n64(0.75), n64(0.90), n64(0.95), n64(0.99), n64(1.), n64(0.), n64(0.25), n64(0.5), n64(0.5), ]); // Test out all interpolation methods let mut correct = true; correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), quantile_indexes.view(), Axis(0), &Linear, ); correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), quantile_indexes.view(), Axis(0), &Higher, ); correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), quantile_indexes.view(), Axis(0), &Lower, ); correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), quantile_indexes.view(), Axis(0), &Midpoint, ); correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), quantile_indexes.view(), Axis(0), &Nearest, ); correct } fn check_one_interpolation_method_for_quantiles_axis_mut( mut v: Array2, quantile_indexes: ArrayView1<'_, N64>, axis: Axis, interpolate: &impl Interpolate, ) -> bool { let bulk_quantiles = v .clone() .quantiles_axis_mut(axis, &quantile_indexes, interpolate); if v.len() == 0 { bulk_quantiles.is_err() } else { let bulk_quantiles = bulk_quantiles.unwrap(); izip!(quantile_indexes, bulk_quantiles.axis_iter(axis)).all( |(&quantile_index, quantile)| { quantile == v.quantile_axis_mut(axis, quantile_index, interpolate) .unwrap() }, ) } } ================================================ FILE: tests/sort.rs ================================================ use ndarray::prelude::*; use ndarray_stats::Sort1dExt; use quickcheck_macros::quickcheck; #[test] fn test_partition_mut() { let mut l = vec![ arr1(&[1, 1, 1, 1, 1]), arr1(&[1, 3, 2, 10, 10]), arr1(&[2, 3, 4, 1]), arr1(&[ 355, 453, 452, 391, 289, 343, 44, 154, 271, 44, 314, 276, 160, 469, 191, 138, 163, 308, 395, 3, 416, 391, 210, 354, 200, ]), arr1(&[ 84, 192, 216, 159, 89, 296, 35, 213, 456, 278, 98, 52, 308, 418, 329, 173, 286, 106, 366, 129, 125, 450, 23, 463, 151, ]), ]; for a in l.iter_mut() { let n = a.len(); let pivot_index = n - 1; let pivot_value = a[pivot_index].clone(); let partition_index = a.partition_mut(pivot_index); for i in 0..partition_index { assert!(a[i] < pivot_value); } assert_eq!(a[partition_index], pivot_value); for j in (partition_index + 1)..n { assert!(pivot_value <= a[j]); } } } #[test] fn test_sorted_get_mut() { let a = arr1(&[1, 3, 2, 10]); let j = a.clone().view_mut().get_from_sorted_mut(2); assert_eq!(j, 3); let j = a.clone().view_mut().get_from_sorted_mut(1); assert_eq!(j, 2); let j = a.clone().view_mut().get_from_sorted_mut(3); assert_eq!(j, 10); } #[quickcheck] fn test_sorted_get_many_mut(mut xs: Vec) -> bool { let n = xs.len(); if n == 0 { true } else { let mut v = Array::from(xs.clone()); // Insert each index twice, to get a set of indexes with duplicates, not sorted let mut indexes: Vec = (0..n).into_iter().collect(); indexes.append(&mut (0..n).collect()); let mut sorted_v = Vec::with_capacity(n); for (i, (key, value)) in v .get_many_from_sorted_mut(&Array::from(indexes)) .into_iter() .enumerate() { if i != key { return false; } sorted_v.push(value); } xs.sort(); println!("Sorted: {:?}. Truth: {:?}", sorted_v, xs); xs == sorted_v } } #[quickcheck] fn test_sorted_get_mut_as_sorting_algorithm(mut xs: Vec) -> bool { let n = xs.len(); if n == 0 { true } else { let mut v = Array::from(xs.clone()); let sorted_v: Vec<_> = (0..n).map(|i| v.get_from_sorted_mut(i)).collect(); xs.sort(); xs == sorted_v } } ================================================ FILE: tests/summary_statistics.rs ================================================ use approx::{abs_diff_eq, assert_abs_diff_eq}; use ndarray::{arr0, array, Array, Array1, Array2, Axis}; use ndarray_rand::rand_distr::Uniform; use ndarray_rand::RandomExt; use ndarray_stats::{ errors::{EmptyInput, MultiInputError, ShapeMismatch}, SummaryStatisticsExt, }; use noisy_float::types::N64; use quickcheck::{quickcheck, TestResult}; use std::f64; #[test] fn test_with_nan_values() { let a = array![f64::NAN, 1.]; let weights = array![1.0, f64::NAN]; assert!(a.mean().unwrap().is_nan()); assert!(a.weighted_mean(&weights).unwrap().is_nan()); assert!(a.weighted_sum(&weights).unwrap().is_nan()); assert!(a .weighted_mean_axis(Axis(0), &weights) .unwrap() .into_scalar() .is_nan()); assert!(a .weighted_sum_axis(Axis(0), &weights) .unwrap() .into_scalar() .is_nan()); assert!(a.harmonic_mean().unwrap().is_nan()); assert!(a.geometric_mean().unwrap().is_nan()); assert!(a.weighted_var(&weights, 0.0).unwrap().is_nan()); assert!(a.weighted_std(&weights, 0.0).unwrap().is_nan()); assert!(a .weighted_var_axis(Axis(0), &weights, 0.0) .unwrap() .into_scalar() .is_nan()); assert!(a .weighted_std_axis(Axis(0), &weights, 0.0) .unwrap() .into_scalar() .is_nan()); } #[test] fn test_with_empty_array_of_floats() { let a: Array1 = array![]; let weights = array![1.0]; assert_eq!(a.mean(), None); assert_eq!(a.weighted_mean(&weights), Err(MultiInputError::EmptyInput)); assert_eq!( a.weighted_mean_axis(Axis(0), &weights), Err(MultiInputError::EmptyInput) ); assert_eq!(a.harmonic_mean(), Err(EmptyInput)); assert_eq!(a.geometric_mean(), Err(EmptyInput)); assert_eq!( a.weighted_var(&weights, 0.0), Err(MultiInputError::EmptyInput) ); assert_eq!( a.weighted_std(&weights, 0.0), Err(MultiInputError::EmptyInput) ); assert_eq!( a.weighted_var_axis(Axis(0), &weights, 0.0), Err(MultiInputError::EmptyInput) ); assert_eq!( a.weighted_std_axis(Axis(0), &weights, 0.0), Err(MultiInputError::EmptyInput) ); // The sum methods accept empty arrays assert_eq!(a.weighted_sum(&array![]), Ok(0.0)); assert_eq!(a.weighted_sum_axis(Axis(0), &array![]), Ok(arr0(0.0))); } #[test] fn test_with_empty_array_of_noisy_floats() { let a: Array1 = array![]; let weights = array![]; assert_eq!(a.mean(), None); assert_eq!(a.weighted_mean(&weights), Err(MultiInputError::EmptyInput)); assert_eq!( a.weighted_mean_axis(Axis(0), &weights), Err(MultiInputError::EmptyInput) ); assert_eq!(a.harmonic_mean(), Err(EmptyInput)); assert_eq!(a.geometric_mean(), Err(EmptyInput)); assert_eq!( a.weighted_var(&weights, N64::new(0.0)), Err(MultiInputError::EmptyInput) ); assert_eq!( a.weighted_std(&weights, N64::new(0.0)), Err(MultiInputError::EmptyInput) ); assert_eq!( a.weighted_var_axis(Axis(0), &weights, N64::new(0.0)), Err(MultiInputError::EmptyInput) ); assert_eq!( a.weighted_std_axis(Axis(0), &weights, N64::new(0.0)), Err(MultiInputError::EmptyInput) ); // The sum methods accept empty arrays assert_eq!(a.weighted_sum(&weights), Ok(N64::new(0.0))); assert_eq!( a.weighted_sum_axis(Axis(0), &weights), Ok(arr0(N64::new(0.0))) ); } #[test] fn test_with_array_of_floats() { let a: Array1 = array![ 0.99889651, 0.0150731, 0.28492482, 0.83819218, 0.48413156, 0.80710412, 0.41762936, 0.22879429, 0.43997224, 0.23831807, 0.02416466, 0.6269962, 0.47420614, 0.56275487, 0.78995021, 0.16060581, 0.64635041, 0.34876609, 0.78543249, 0.19938356, 0.34429457, 0.88072369, 0.17638164, 0.60819363, 0.250392, 0.69912532, 0.78855523, 0.79140914, 0.85084218, 0.31839879, 0.63381769, 0.22421048, 0.70760302, 0.99216018, 0.80199153, 0.19239188, 0.61356023, 0.31505352, 0.06120481, 0.66417377, 0.63608897, 0.84959691, 0.43599069, 0.77867775, 0.88267754, 0.83003623, 0.67016118, 0.67547638, 0.65220036, 0.68043427 ]; // Computed using NumPy let expected_mean = 0.5475494059146699; let expected_weighted_mean = 0.6782420496397121; let expected_weighted_var = 0.04306695637838332; // Computed using SciPy let expected_harmonic_mean = 0.21790094950226022; let expected_geometric_mean = 0.4345897639796527; assert_abs_diff_eq!(a.mean().unwrap(), expected_mean, epsilon = 1e-9); assert_abs_diff_eq!( a.harmonic_mean().unwrap(), expected_harmonic_mean, epsilon = 1e-7 ); assert_abs_diff_eq!( a.geometric_mean().unwrap(), expected_geometric_mean, epsilon = 1e-12 ); // Input array used as weights, normalized let weights = &a / a.sum(); assert_abs_diff_eq!( a.weighted_sum(&weights).unwrap(), expected_weighted_mean, epsilon = 1e-12 ); assert_abs_diff_eq!( a.weighted_var(&weights, 0.0).unwrap(), expected_weighted_var, epsilon = 1e-12 ); assert_abs_diff_eq!( a.weighted_std(&weights, 0.0).unwrap(), expected_weighted_var.sqrt(), epsilon = 1e-12 ); let data = a.into_shape_with_order((2, 5, 5)).unwrap(); let weights = array![0.1, 0.5, 0.25, 0.15, 0.2]; assert_abs_diff_eq!( data.weighted_mean_axis(Axis(1), &weights).unwrap(), array![ [0.50202721, 0.53347361, 0.29086033, 0.56995637, 0.37087139], [0.58028328, 0.50485216, 0.59349973, 0.70308937, 0.72280630] ], epsilon = 1e-8 ); assert_abs_diff_eq!( data.weighted_mean_axis(Axis(2), &weights).unwrap(), array![ [0.33434378, 0.38365259, 0.56405781, 0.48676574, 0.55016179], [0.71112376, 0.55134174, 0.45566513, 0.74228516, 0.68405851] ], epsilon = 1e-8 ); assert_abs_diff_eq!( data.weighted_sum_axis(Axis(1), &weights).unwrap(), array![ [0.60243266, 0.64016833, 0.34903240, 0.68394765, 0.44504567], [0.69633993, 0.60582259, 0.71219968, 0.84370724, 0.86736757] ], epsilon = 1e-8 ); assert_abs_diff_eq!( data.weighted_sum_axis(Axis(2), &weights).unwrap(), array![ [0.40121254, 0.46038311, 0.67686937, 0.58411889, 0.66019415], [0.85334851, 0.66161009, 0.54679815, 0.89074219, 0.82087021] ], epsilon = 1e-8 ); } #[test] fn weighted_sum_dimension_zero() { let a = Array2::::zeros((0, 20)); assert_eq!( a.weighted_sum_axis(Axis(0), &Array1::zeros(0)).unwrap(), Array1::from_elem(20, 0) ); assert_eq!( a.weighted_sum_axis(Axis(1), &Array1::zeros(20)).unwrap(), Array1::from_elem(0, 0) ); assert_eq!( a.weighted_sum_axis(Axis(0), &Array1::zeros(1)), Err(MultiInputError::ShapeMismatch(ShapeMismatch { first_shape: vec![0, 20], second_shape: vec![1] })) ); assert_eq!( a.weighted_sum(&Array2::zeros((10, 20))), Err(MultiInputError::ShapeMismatch(ShapeMismatch { first_shape: vec![0, 20], second_shape: vec![10, 20] })) ); } #[test] fn mean_eq_if_uniform_weights() { fn prop(a: Vec) -> TestResult { if a.len() < 1 { return TestResult::discard(); } let a = Array1::from(a); let weights = Array1::from_elem(a.len(), 1.0 / a.len() as f64); let m = a.mean().unwrap(); let wm = a.weighted_mean(&weights).unwrap(); let ws = a.weighted_sum(&weights).unwrap(); TestResult::from_bool( abs_diff_eq!(m, wm, epsilon = 1e-9) && abs_diff_eq!(wm, ws, epsilon = 1e-9), ) } quickcheck(prop as fn(Vec) -> TestResult); } #[test] fn mean_axis_eq_if_uniform_weights() { fn prop(mut a: Vec) -> TestResult { if a.len() < 24 { return TestResult::discard(); } let depth = a.len() / 12; a.truncate(depth * 3 * 4); let weights = Array1::from_elem(depth, 1.0 / depth as f64); let a = Array1::from(a) .into_shape_with_order((depth, 3, 4)) .unwrap(); let ma = a.mean_axis(Axis(0)).unwrap(); let wm = a.weighted_mean_axis(Axis(0), &weights).unwrap(); let ws = a.weighted_sum_axis(Axis(0), &weights).unwrap(); TestResult::from_bool( abs_diff_eq!(ma, wm, epsilon = 1e-12) && abs_diff_eq!(wm, ws, epsilon = 1e12), ) } quickcheck(prop as fn(Vec) -> TestResult); } #[test] fn weighted_var_eq_var_if_uniform_weight() { fn prop(a: Vec) -> TestResult { if a.len() < 1 { return TestResult::discard(); } let a = Array1::from(a); let weights = Array1::from_elem(a.len(), 1.0 / a.len() as f64); let weighted_var = a.weighted_var(&weights, 0.0).unwrap(); let var = a.var_axis(Axis(0), 0.0).into_scalar(); TestResult::from_bool(abs_diff_eq!(weighted_var, var, epsilon = 1e-10)) } quickcheck(prop as fn(Vec) -> TestResult); } #[test] fn weighted_var_algo_eq_simple_algo() { fn prop(mut a: Vec) -> TestResult { if a.len() < 24 { return TestResult::discard(); } let depth = a.len() / 12; a.truncate(depth * 3 * 4); let a = Array1::from(a) .into_shape_with_order((depth, 3, 4)) .unwrap(); let mut success = true; for axis in 0..3 { let axis = Axis(axis); let weights = Array::random(a.len_of(axis), Uniform::new(0.0, 1.0).unwrap()); let mean = a .weighted_mean_axis(axis, &weights) .unwrap() .insert_axis(axis); let res_1_pass = a.weighted_var_axis(axis, &weights, 0.0).unwrap(); let res_2_pass = (&a - &mean) .mapv_into(|v| v.powi(2)) .weighted_mean_axis(axis, &weights) .unwrap(); success &= abs_diff_eq!(res_1_pass, res_2_pass, epsilon = 1e-10); } TestResult::from_bool(success) } quickcheck(prop as fn(Vec) -> TestResult); } #[test] fn test_central_moment_with_empty_array_of_floats() { let a: Array1 = array![]; for order in 0..=3 { assert_eq!(a.central_moment(order), Err(EmptyInput)); assert_eq!(a.central_moments(order), Err(EmptyInput)); } } #[test] fn test_zeroth_central_moment_is_one() { let n = 50; let bound: f64 = 200.; let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs()).unwrap()); assert_eq!(a.central_moment(0).unwrap(), 1.); } #[test] fn test_first_central_moment_is_zero() { let n = 50; let bound: f64 = 200.; let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs()).unwrap()); assert_eq!(a.central_moment(1).unwrap(), 0.); } #[test] fn test_central_moments() { let a: Array1 = array![ 0.07820559, 0.5026185, 0.80935324, 0.39384033, 0.9483038, 0.62516215, 0.90772261, 0.87329831, 0.60267392, 0.2960298, 0.02810356, 0.31911966, 0.86705506, 0.96884832, 0.2222465, 0.42162446, 0.99909868, 0.47619762, 0.91696979, 0.9972741, 0.09891734, 0.76934818, 0.77566862, 0.7692585, 0.2235759, 0.44821286, 0.79732186, 0.04804275, 0.87863238, 0.1111003, 0.6653943, 0.44386445, 0.2133176, 0.39397086, 0.4374617, 0.95896624, 0.57850146, 0.29301706, 0.02329879, 0.2123203, 0.62005503, 0.996492, 0.5342986, 0.97822099, 0.5028445, 0.6693834, 0.14256682, 0.52724704, 0.73482372, 0.1809703, ]; // Computed using scipy.stats.moment let expected_moments = vec![ 1., 0., 0.09339920262960291, -0.0026849636727735186, 0.015403769257729755, -0.001204176487006564, 0.002976822584939186, ]; for (order, expected_moment) in expected_moments.iter().enumerate() { assert_abs_diff_eq!( a.central_moment(order as u16).unwrap(), expected_moment, epsilon = 1e-8 ); } } #[test] fn test_bulk_central_moments() { // Test that the bulk method is coherent with the non-bulk method let n = 50; let bound: f64 = 200.; let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs()).unwrap()); let order = 10; let central_moments = a.central_moments(order).unwrap(); for i in 0..=order { assert_eq!(a.central_moment(i).unwrap(), central_moments[i as usize]); } } #[test] fn test_kurtosis_and_skewness_is_none_with_empty_array_of_floats() { let a: Array1 = array![]; assert_eq!(a.skewness(), Err(EmptyInput)); assert_eq!(a.kurtosis(), Err(EmptyInput)); } #[test] fn test_kurtosis_and_skewness() { let a: Array1 = array![ 0.33310096, 0.98757449, 0.9789796, 0.96738114, 0.43545674, 0.06746873, 0.23706562, 0.04241815, 0.38961714, 0.52421271, 0.93430327, 0.33911604, 0.05112372, 0.5013455, 0.05291507, 0.62511183, 0.20749633, 0.22132433, 0.14734804, 0.51960608, 0.00449208, 0.4093339, 0.2237519, 0.28070469, 0.7887231, 0.92224523, 0.43454188, 0.18335111, 0.08646856, 0.87979847, 0.25483457, 0.99975627, 0.52712442, 0.41163279, 0.85162594, 0.52618733, 0.75815023, 0.30640695, 0.14205781, 0.59695813, 0.851331, 0.39524328, 0.73965373, 0.4007615, 0.02133069, 0.92899207, 0.79878191, 0.38947334, 0.22042183, 0.77768353, ]; // Computed using scipy.stats.kurtosis(a, fisher=False) let expected_kurtosis = 1.821933711687523; // Computed using scipy.stats.skew let expected_skewness = 0.2604785422878771; let kurtosis = a.kurtosis().unwrap(); let skewness = a.skewness().unwrap(); assert_abs_diff_eq!(kurtosis, expected_kurtosis, epsilon = 1e-12); assert_abs_diff_eq!(skewness, expected_skewness, epsilon = 1e-8); }