Showing preview only (686K chars total). Download the full file or copy to clipboard to get everything.
Repository: clbustos/statsample
Branch: master
Commit: d5caf4ecf82c
Files: 157
Total size: 643.6 KB
Directory structure:
gitextract_b74amxs6/
├── .gitignore
├── .travis.yml
├── Gemfile
├── History.txt
├── LICENSE.txt
├── Manifest.txt
├── README.md
├── Rakefile
├── benchmarks/
│ ├── correlation_matrix_15_variables.rb
│ ├── correlation_matrix_5_variables.rb
│ ├── correlation_matrix_methods/
│ │ ├── correlation_matrix.ds
│ │ ├── correlation_matrix.html
│ │ ├── correlation_matrix.rb
│ │ ├── correlation_matrix.xls
│ │ ├── correlation_matrix_gsl_ruby.ods
│ │ ├── correlation_matrix_with_graphics.ods
│ │ └── results.ds
│ ├── factor_map.rb
│ └── helpers_benchmark.rb
├── data/
│ └── locale/
│ └── es/
│ └── LC_MESSAGES/
│ └── statsample.mo
├── doc_latex/
│ └── manual/
│ └── equations.tex
├── examples/
│ ├── boxplot.rb
│ ├── correlation_matrix.rb
│ ├── dataset.rb
│ ├── dominance_analysis.rb
│ ├── dominance_analysis_bootstrap.rb
│ ├── histogram.rb
│ ├── icc.rb
│ ├── levene.rb
│ ├── multiple_regression.rb
│ ├── multivariate_correlation.rb
│ ├── parallel_analysis.rb
│ ├── polychoric.rb
│ ├── principal_axis.rb
│ ├── reliability.rb
│ ├── scatterplot.rb
│ ├── t_test.rb
│ ├── tetrachoric.rb
│ ├── u_test.rb
│ ├── vector.rb
│ └── velicer_map_test.rb
├── grab_references.rb
├── lib/
│ ├── spss.rb
│ ├── statsample/
│ │ ├── analysis/
│ │ │ ├── suite.rb
│ │ │ └── suitereportbuilder.rb
│ │ ├── analysis.rb
│ │ ├── anova/
│ │ │ ├── contrast.rb
│ │ │ ├── oneway.rb
│ │ │ └── twoway.rb
│ │ ├── anova.rb
│ │ ├── bivariate/
│ │ │ └── pearson.rb
│ │ ├── bivariate.rb
│ │ ├── codification.rb
│ │ ├── converter/
│ │ │ ├── csv.rb
│ │ │ └── spss.rb
│ │ ├── converters.rb
│ │ ├── crosstab.rb
│ │ ├── dataset.rb
│ │ ├── dominanceanalysis/
│ │ │ └── bootstrap.rb
│ │ ├── dominanceanalysis.rb
│ │ ├── factor/
│ │ │ ├── map.rb
│ │ │ ├── parallelanalysis.rb
│ │ │ ├── pca.rb
│ │ │ ├── principalaxis.rb
│ │ │ └── rotation.rb
│ │ ├── factor.rb
│ │ ├── graph/
│ │ │ ├── boxplot.rb
│ │ │ ├── histogram.rb
│ │ │ └── scatterplot.rb
│ │ ├── graph.rb
│ │ ├── histogram.rb
│ │ ├── matrix.rb
│ │ ├── multiset.rb
│ │ ├── regression/
│ │ │ ├── multiple/
│ │ │ │ ├── alglibengine.rb
│ │ │ │ ├── baseengine.rb
│ │ │ │ ├── gslengine.rb
│ │ │ │ ├── matrixengine.rb
│ │ │ │ └── rubyengine.rb
│ │ │ ├── multiple.rb
│ │ │ └── simple.rb
│ │ ├── regression.rb
│ │ ├── reliability/
│ │ │ ├── icc.rb
│ │ │ ├── multiscaleanalysis.rb
│ │ │ ├── scaleanalysis.rb
│ │ │ └── skillscaleanalysis.rb
│ │ ├── reliability.rb
│ │ ├── resample.rb
│ │ ├── rserve_extension.rb
│ │ ├── shorthand.rb
│ │ ├── srs.rb
│ │ ├── test/
│ │ │ ├── bartlettsphericity.rb
│ │ │ ├── chisquare.rb
│ │ │ ├── f.rb
│ │ │ ├── kolmogorovsmirnov.rb
│ │ │ ├── levene.rb
│ │ │ ├── t.rb
│ │ │ ├── umannwhitney.rb
│ │ │ └── wilcoxonsignedrank.rb
│ │ ├── test.rb
│ │ ├── vector/
│ │ │ └── gsl.rb
│ │ ├── vector.rb
│ │ └── version.rb
│ └── statsample.rb
├── po/
│ ├── es/
│ │ ├── statsample.mo
│ │ └── statsample.po
│ └── statsample.pot
├── references.txt
├── setup.rb
├── test/
│ ├── fixtures/
│ │ ├── correlation_matrix.rb
│ │ ├── hartman_23.matrix
│ │ ├── repeated_fields.csv
│ │ ├── stock_data.csv
│ │ ├── test_csv.csv
│ │ ├── test_xls.xls
│ │ ├── tetmat_matrix.txt
│ │ └── tetmat_test.txt
│ ├── helpers_tests.rb
│ ├── test_analysis.rb
│ ├── test_anova_contrast.rb
│ ├── test_anovaoneway.rb
│ ├── test_anovatwoway.rb
│ ├── test_anovatwowaywithdataset.rb
│ ├── test_anovawithvectors.rb
│ ├── test_awesome_print_bug.rb
│ ├── test_bartlettsphericity.rb
│ ├── test_bivariate.rb
│ ├── test_codification.rb
│ ├── test_crosstab.rb
│ ├── test_csv.rb
│ ├── test_dataset.rb
│ ├── test_dominance_analysis.rb
│ ├── test_factor.rb
│ ├── test_factor_map.rb
│ ├── test_factor_pa.rb
│ ├── test_ggobi.rb
│ ├── test_gsl.rb
│ ├── test_histogram.rb
│ ├── test_matrix.rb
│ ├── test_multiset.rb
│ ├── test_regression.rb
│ ├── test_reliability.rb
│ ├── test_reliability_icc.rb
│ ├── test_reliability_skillscale.rb
│ ├── test_resample.rb
│ ├── test_rserve_extension.rb
│ ├── test_srs.rb
│ ├── test_statistics.rb
│ ├── test_stest.rb
│ ├── test_stratified.rb
│ ├── test_test_f.rb
│ ├── test_test_kolmogorovsmirnov.rb
│ ├── test_test_t.rb
│ ├── test_umannwhitney.rb
│ ├── test_vector.rb
│ ├── test_wilcoxonsignedrank.rb
│ └── test_xls.rb
└── web/
└── Rakefile
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
doc.yaml
*.swp
*.rbc
coverage
*~
agregar_adsense_a_doc.rb
pkg
doc
.yardoc
examples/images/*
examples/*.html
web/upload_task.rb
.idea
================================================
FILE: .travis.yml
================================================
language:
ruby
rvm:
- '1.9.3'
- '2.0.0'
- '2.1.1'
script:
bundle exec rake test
before_install:
- sudo apt-get update -qq
- sudo apt-get install -y libgsl0-dev r-base r-base-dev
- sudo Rscript -e "install.packages(c('Rserve','irr'),,'http://cran.us.r-project.org')"
================================================
FILE: Gemfile
================================================
source "https://www.rubygems.org"
gem 'minitest'
gem 'rdoc'
gem 'mocha', '0.14.0' #:require=>'mocha/setup'
gem 'shoulda','3.5.0'
gem 'shoulda-matchers','2.2.0'
gem 'hoe'
#gem 'bio-statsample-timeseries'
gem 'reportbuilder'
gem 'dirty-memoize'
gem 'distribution'
gem 'extendmatrix'
gem 'minimization'
gem 'rserve-client'
gem 'rubyvis'
gem 'spreadsheet'
gem 'rb-gsl'
gem 'awesome_print'
================================================
FILE: History.txt
================================================
=== 1.4.0 / 2014-10-11
* Replaced README.txt for README.md
* Replace File.exists? for File.exist?
+ New Dataset.join to join two dataset based on some fields
* Deleted MLE based regression (Probit and logistic). Now all GML methods are on statsample-glm
=== 1.3.1 / 2014-06-26
* Example referred to a SimpleRegression class which doesn't exist. Updated to working example.
* Merge pull request #15 from Blahah/patch-1
* Updated Gemfile
* Updated README.txt for v1.3.0
* Updated to ruby 2.1.0
=== 1.3.0 / 2013-09-19
* Merge remote-tracking branch 'vpereira/master' into vpereira
* New Wilcoxon Signed Rank test
* Remove TimeSeries class. Now is available on gem "bio-statsample-timeseries" [GSOC 2013 project :) ]
* Update shoulda support
* added Bundle depds
* improved the csv read method (requires tests)
* open svg on mac osx
=== 1.2.0 / 2011-12-15
* Added support for time series (TimeSeries object): MA, EMA, MACD, acf, lag and delta. [Rob Britton]
* Changed summary attribute to properly display 'b' value for simple linear regression [hstove]
* Merge pull request #6 from hstove/patch-1Changed summary attribute to properly display 'b' value for simple linear regression [Claudio Bustos]
* fix example code for CovariateMatrix [James Kebinger]
=== 1.1.0 / 2011-06-02
* New Statsample::Anova::Contrast
* Jacknife and bootstrap for Vector. Thanks to John Firebaugh for the idea
* Improved Statsample::Analysis API
* Updated CSV.read. Third argument is a Hash with options to CSV class
* Added restriction on Statsample::Excel.read
* Updated spanish po
* Better summary for Vector
* Improving summary of t related test (confidence interval and estimate output)
* Replaced c for vector on Statsample::Analysis examples
* Added Vector#median_absolute_deviation
* First implementation of Kolmogorov Smirnov test. Returns correct D value, but without Kolmogorov distribution isn't very useful.
=== 1.0.1 / 2011-01-28
* Updated spanish po.
* Update distribution gem dependence. On Ruby 1.8.7, distribution 0.2.0 raises an error.
=== 1.0.0 / 2011-01-27
* Added Statsample::Analysis, a beautiful DSL to perform fast statistical analysis using statsample. See directory /examples
* Created benchmarks directory
* Removed Distribution module from statsample and moved to a gem. Changes on code to reflect new API
* Optimized simple regression. Better library detection
* New 'should_with_gsl' to test methods with gsl. Refactored Factor::MAP
* Almost complete GSL cleanup on Vector
* Updated some doc on Vector
* Used GSL::Matrix on Factor classes when available
* SkillScaleAnalysis doesn't crash with one or more vectors with 0 variance
* Modified examples using Statsample::Analysis
* Simplified eigen calculations
* Updated some examples. Added correlation matrix speed suite
* Correlation matrix optimized. Better specs
* Optimized correlation matrix. Use gsl matrix algebra or pairwise correlations depending on empiric calculated equations. See benchmarks/correlation_matrix.rb to see implementation of calculation
* Moved tests fixtures from data to test/fixtures
* Fixed some errors on tests
* Bug fix: constant_se on binomial regression have an error
* All test should work on ruby 1.9.3
* New Vector.[] and Vector.new_scale
* Detect linearly dependent predictors on OLS.
=== 0.18.0 / 2011-01-07
* New Statsample.load_excel
* New Statsample.load_csv
* Statsample::Dataset#[] accepts an array of fields and uses clone
* New Dataset#correlation_matrix and Statsample::Dataset#covariance_matrix
* Statsample::Dataset.filter add labels to vectors
* Principal Components generation complete on PCA (covariance matrix prefered)
* Added note on Statsample::Factor::PCA about erratic signs on eigenvalues,
* Statsample::Factor::PCA.component_matrix calculated different for covariance matrix
* Improved summary for PCA using covariance matrix
* New attribute :label_angle for Statsample::Graph::Boxplot
* Fixed Scatterplots scaling problems
* New attributes for Scatterplots: groups, minimum_x, minimum_y, maximum_x,
* New Statsample::Multiset#union allows to create a new dataset based on a m
* New Statsample::Multiset#each to traverse through datasets
* Bug fix: Vector#standarized and Vector#percentile crash on nil data
* Bug fix: Vector#mean and Vector#sd crash on data without valid values
* Modified methods names on Statsample::Factor::PCA : feature_vector to feature_matrix, data_transformation to principal_components
* Added Statsample::Vector.vector_centered
* Factor::MAP.with_dataset() implemented
* Bug fix: Factor::MAP with correlation matrix with non-real eigenvalues crashes * Added documentation for Graph::Histogram
* Added MPA to Reliability::MultiScaleAnalysis
* Added custom names for returned vectors and datasets
* Updated spanish traslation
* Graph::Histogram updated. Custom x and y max and min, optional normal distribution drawing
* Updated Histogram class, with several new methods compatibles with GSL::Histogram
=== 0.17.0 / 2010-12-09
* Added Statsample::Graph::Histogram and Statsample::Graph::Boxplot
* Added Statsample::Reliability::SkillScaleAnalysis for analysis of skill based scales.
* Delete combination and permutation clases. Backport for ruby 1.8.7 widely available
* Deleted unused variables (thanks, ruby-head)
=== 0.16.0 / 2010-11-13
* Works on ruby 1.9.2 and HEAD. Updated Rakefile and manifest
* Removed all graph based on Svg::Graph.
* First operative version of Graph with Rubyvis
* Corrected bug on Distribution::Normal.cdf.
* Added reference on references.txt
* Ruby-based random gaussian distribution generator when gsl not available
* Added population average deviation [Al Chou]
=== 0.15.1 / 2010-10-20
* Statsample::Excel and Statsample::PlainText add name to vectors equal to field name
* Statsample::Dataset.delete_vector accept multiple fields.
* Statsample::Dataset.dup_only_valid allows duplication of specific fields
* ScaleAnalysis doesn't crash on one-item scales
* Updated references
=== 0.15.0 / 2010-09-07
* Added class Statsample::Reliability::ICC for calculation of Intra-class correlation (Shrout & Fleiss, 1979; McGraw & Wong, 1996). Tested with SPSS and R values.
* References: Updated and standarized references on many classes. Added grab_references.rb script, to create a list of references for library
* Added Spearman-Brown prophecy on Reliability module
* Distribution::F uses Gsl when available
* Added mean r.p.b. and item sd on Scale Analysis
* Corrected bug on Vector.ary_method and example of Anova Two Way using vector.
=== 0.14.1 / 2010-08-18
* Added extra information on $DEBUG=true.
* Changed ParallelAnalysis: with_random_data parameters, bootstrap_method options are data and random, resolve bug related to number of factors to preserve, resolved bug related to original eigenvalues, can support failed bootstrap of data for Tetrachoric correlation.
* Optimized eigenpairs on Matrix when GSL is available.
* Added test for parallel analysis using data bootstraping
* Updated .pot and Manifest.txt
* Added test for kmo(global and univariate), bartlett and anti-image. Kmo and Bartlett have test based on Dziuban and Shirkey with correct results
* Complete set of test to test if a correlation matrix is appropriate for factor analysis: test of sphericity, KMO and anti-image (see Dziuban and Shirkey, 1974)
* Updated Parallel Analysis to work on Principal Axis Analysis based on O'Connors formulae
* Added reference for Statsample::Factor::MAP
=== 0.14.0 / 2010-08-16
* Added Statsample::Factor::MAP, to execute Velicer's (1976) MAP to determine the number of factors to retain on EFA
* Bug fix on test suite on Ruby 1.8.7
* Horn's Parallel Analysis operational and tested for pure random data
* Fixed bug on Excel writer on Ruby1.9 (frozen string on header raises an error).
* Extra information on Factorial Analysis on summaries
* Fixed bug on Factor::Rotation when used ::Matrix without field method.
* Added Vector#vector_percentil method
* Summaries for PCA, Rotation, MultiScale and ScaleAnalysis created or improved.
* Factor::PCA could have rotation and parallel analysis on summary.
* Cronbach's alpha from covariance matrix raise an error on size<2
* MultiScaleAnalysis could have Parallel Analysis on summary.
* Added Chi Square test
* Added new information on README.txt
=== 0.13.1 / 2010-07-03
* Rserve extensions for dataset and vector operational
* On x86_64, variance from gsl is not exactly equal to sum of variance-covariance on Statsample::Reliability::Scale, but in delta 1e-10
* Updated README.txt
* Reliability::ScaleAnalysis uses covariance matrix for 'if deleted' calculations to optimize memory and speed. Test for 'if deleted' statistics
* More string translated. Added dependency on tetrachoric on parallel analysis
=== 0.13.0 / 2010-06-13
* Polychoric and Tetrachoric moved to gem statsample-bivariate-extension
* All classes left with summary method include Summarizable now. Every method which return localizable string is now parsed with _()
* Correct implementation of Reliability::MultiScaleAnalysis.
* Spanish translation for Mann-Whitney's U
* Added example for Mann-Whitney's U test
* Better summary for Mann-Whitney's U Test
* Added Statsample::Bivariate::Pearson class to retrieve complete analysis for r correlations
* Bug fix on DominanceAnalysis::Bootstrap
=== 0.12.0 / 2010-06-09
* Modified Rakefile to remove dependencies based on C extensions. These are moved to statsample-optimization
* T test with unequal variance fixed on i686
* API Change: Renamed Reliability::ItemAnalysis and moved to independent file
* New Reliability::MultiScaleAnalysis for easy analysis of scales on a same survey, includind reliability, correlation matrix and Factor Analysis
* Updated README to reflect changes on Reliability module
* SvgGraph works with reportbuilder.
* Added methods on Polychoric based on Olsson(1979): the idea is estimate using second derivatives.
* Distribution test changed (reduced precision on 32 bits system
=== 0.11.2 / 2010-05-05
* Updated dependency for 'extendedmatrix' to 0.2 (Matrix#build method)
=== 0.11.1 / 2010-05-04
* Removed Matrix almost all Matrix extensions and replaced by dependency on 'extendmatrix' gem
* Added dependency to gsl >=1.12.109. Polychoric with joint method fails without this explicit dependency
=== 0.11.0 / 2010-04-16
<b>New features:</b>
* Added Statsample::Anova::TwoWay and Statsample::Anova::TwoWayWithVectors
* Added Statsample.clone_only valid and Statsample::Dataset.clone_only_valid, for cheap copy on already clean vectors
<b>Optimizations and bug fix</b>
* Removed library statistics2 from package. Used gem statistics2 instead, because have a extension version
* Added example for Reliability class
* Bug fix on Statsample::DominanceAnalysis
=== 0.10.0 / 2010-04-13
<b>API modifications</b>
* Refactoring of Statsample::Anova module.
* Statsample::Anova::OneWay :implementation of generic ANOVA One-Way, used by Multiple Regression, for example.
* Statsample::Anova::OneWayWithVectors: implementation of ANOVA One-Way to test differences of means.
<b>New features</b>
* New Statsample::Factor::Parallel Analysis, to performs Horn's 'parallel analysis' to a PCA, to adjust for sample bias on retention of components.
* New Statsample.only_valid_clone and Statsample::Dataset.clone, which allows to create shallow copys of valid vector and datasets. Used by correlation matrix methods to optimize calculations
* New module Statsample::Summarizable, which add GetText and ReportBuilder support to classes. Better summaries for Vector, Dataset, Crosstab, PrincipalAxis, PCA and Regression::Multiple classes
<b>Optimizations and bug fix</b>
* Refactoring of Statsample::Regression::Multiple classes. Still needs works
* Bug fix on Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis
* Bug fix on Statsample::Bivariate::Polychoric.new_with_vectors. Should be defined class method, no instance method.
* Optimized correlation and covariance matrix. Only calculates the half of matrix and the other half is returned from cache
* More tests coverage. RCOV Total: 82.51% , Code: 77.83%
=== 0.9.0 / 2010-04-04
* New Statsample::Test::F. Anova::OneWay subclasses it and Regression classes uses it.
=== 0.8.2 / 2010-04-01
* Statsample::PromiseAfter replaced by external package DirtyMemoize [http://rubygems.org/gems/dirty-memoize]
=== 0.8.1 / 2010-03-29
* Fixed Regression summaries
=== 0.8.0 / 2010-03-29
* New Statsample::Test::T module, with classes and methods to do Student's t tests for one and two samples.
* Statsample::PromiseAfter module to set a number of variables without explicitly call the compute or iterate method
* All tests ported to MiniUnit
* Directory 'demo' renamed to 'examples'
* Bug fix on report_building on Statsample::Regression::Multiple classes
=== 0.7.0 / 2010-03-25
* Ported to ReportBuilder 1.x series
* Implementation of ruby based covariance and correlation changed to a clearer code
* Statsample::Vector#svggraph_frequencies accepts IO
* Some test ported to Miniunit
* CSV on Ruby1.8 uses FasterCSV
=== 0.6.7 / 2010-03-23
* Bug fix: dependency on ReportBuilder should be set to "~>0.2.0", not "0.2"
=== 0.6.6 / 2010-03-22
* Set ReportBuilder dependency to '0.2.~' version, because future API break
* Removed Alglib dependency
* Factor::PrincipalAxis and Factor::PCA reworked
* Standarization of documentation on almost every file
* New Statsample::Test::Levene, to test equality of variances
* Constant HAS_GSL replaced by Statsample.has_gsl?
* PCA and Principal Axis test based on R and SPSS results
* Bug fix on test_dataset.rb / test_saveload
* Added Rakefile
* Demos for levene, Principal Axis
=== 0.6.5 / 2010-02-24
* Bug fix on test: Use tempfile instead of tempdir
* Multiple Regression: Calculation of constant standard error , using covariance matrix.
* Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
* Dominance Analysis could use Correlation or Covariance Matrix as input.
* Dominance Analysis extension to multiple dependent variables (Azen & Budescu, 2006)
* Two-step estimate of Polychoric correlation uses minimization gem, so could be executed without rb-gsl
=== 0.6.4 / 2010-02-19
* Dominance Analysis and Dominance Analysis Bootstrap allows multivariate dependent analysis.
* Test suite for Dominance Analysis, using Azen and Budescu papers as references
* X^2 for polychoric correlation
=== 0.6.3 / 2010-02-15
* Statsample::Bivariate::Polychoric have joint estimation.
* Some extra documentation and bug fixs
=== 0.6.2 / 2010-02-11
* New Statsample::Bivariate::Polychoric. For implement: X2 and G2
* New matrix.rb, for faster development of Contingence Tables and Correlation Matrix
=== 0.6.1 / 2010-02-08
* Bug fix on DominanceAnalysis summary for Ruby1.9
* Some extra documentation
=== 0.6.0 / 2010-02-05
* New Statsample::Factor module. Include classes for extracting factors (Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis) and rotate component matrix ( Statsample::Factor::Rotation subclasses). For now, only orthogonal rotations
* New Statsample::Dataset.crosstab_with_asignation, Statsample::Dataset.one_to_many
* New class Statsample::Permutation to produce permutations of a given array
* New class Statsample::Histogram, with same interface as GSL one
* New class Statsample::Test::UMannWhitney, to perform Mann-Whitney's U test. Gives z based and exact calculation of probability
* Improved support for ReportBuilder
* Statsample::Codification module reworked
* Fixed bugs on Dominance Analysis classes
* Fixed bugs on Statsample::Vector.kurtosis and Statsample::Vector.skew
=== 0.5.1 / 2009-10-06
* New class Statsample::Bivariate::Tetrachoric, for calculation of tetrachoric correlations. See http://www.john-uebersax.com/stat/tetra.htm for information.
* New Statsample::Dataset.merge
* New Statsample::Vector.dichotomize
* New ItemReliability.item_difficulty_analysis
* New module Statsample::SPSS, to export information to SPSS. For now, only tetrachoric correlation matrix are provided
* All SpreadSheet based importers now accept repeated variable names and renames they on the fly
* MultipleRegression::BaseEngine moved to new file
* Bug fix for MultipleRegression::GslEngine checks for Alglib, not GSL
=== 0.5.0 / 2009-09-26
* Vector now uses a Hash as a third argument
* Tested on Ruby 1.8.6, 1.8.7 and 1.9.1 with multiruby
=== 0.4.1 / 2009-09-12
* More methods and usage documentation
* Logit tests
* Bug fix: rescue for requires doesn't specify LoadError
* Binomial::BaseEngine new methods: coeffs_se, coeffs, constant and constant_se
=== 0.4.0 / 2009-09-10
* New Distribution module, based on statistics2.rb by Shin-ichiro HARA. Replaces all instances of GSL distributions pdf and cdf calculations for native calculation.
* New Maximum Likehood Estimation for Logit, Probit and Normal Distribution using Von Tessin(2005) algorithm. See MLE class and subclasses for more information.
* New Binomial regression subclasses (Logit and Probit), usign MLE class
* Added tests for gsl, Distribution, MLE and Logit
* Bug fix on svggraph.rb. Added check_type for scale graphics
* Bug fix on gdchart. Replaced old Nominal, Ordinal and Scale for Vector
=== 0.3.4 / 2009-08-21
* Works with statsample-optimization 2.0.0
* Vector doesn't uses delegation. All methods are part of Vector
* Added Combination. Generates all combination of n elements taken r at a time
* Bivariate#prop_pearson now can uses as a second parameter :both, :left, :right, :positive or :negative
* Added LICENSE.txt
=== 0.3.3 / 2009-08-11
* Added i18n support. For now, only spanish translation available
* Bug fix: Test now load libraries on ../lib path
* Excel and CSV importers automatically modify type of vector to Scale when all data are numbers or nils values
=== 0.3.2 / 2009-08-04
* Added Regression::Multiple::GslEngine
* Added setup.rb
* Crosstab#row_label and #column_name
* DominanceAnalysis and DominanceAnalysisBootstrap uses Dataset#labels for Vector names.
=== 0.3.1 / 2009-08-03
* Name and logic of Regression classes changed. Now, you have Regression::Simple class and Regression::Multiple module with two engines: RubyEngine and AlglibEngne
* New Crosstab#summary
=== 0.3.0 / 2009-08-02
* Statsample renamed to Statsample
* Optimization extension goes to another gem: ruby-statsample-optimization
=== 0.2.0 / 2009-08-01
* One Way Anova on Statsample::Anova::OneWay
* Dominance Analysis!!!! The one and only reason to develop a Multiple Regression on pure ruby.
* Multiple Regression on Multiple Regression module. Pairwise (pure ruby) or MultipleRegressionPairwise and Listwise (optimized) on MultipleRegressionAlglib and
* New Dataset#to_gsl_matrix, #from_to,#[..],#bootstrap,#vector_missing_values, #vector_count_characters, #each_with_index, #collect_with_index
* New Vector#box_cox_transformation
* Module Correlation renamed to Bivariate
* Some fancy methods and classes to create Summaries
* Some documentation about Algorithm used on doc_latex
* Deleted 'distributions' extension. Ruby/GSL has all the pdf and cdf you ever need.
* Tests work without any dependency. Only nags about missing deps.
* Test for MultipleRegression, Anova, Excel, Bivariate.correlation_matrix and many others
=== 0.1.9 / 2009-05-22
* Class Vector: new method vector_standarized_pop, []=, min,max
* Class Dataset: global variable $RUBY_SS_ROW stores the row number on each() and related methods. dup() with argument returns a copy of the dataset only for given fields. New methods: standarize, vector_mean, collect, verify,collect_matrix
* Module Correlation: new methods covariance, t_pearson, t_r, prop_pearson, covariance_matrix, correlation_matrix, correlation_probability_matrix
* Module SRS: New methods estimation_n0 and estimation_n
* Module Reliability: new ItemCharacteristicCurve class
* New HtmlReport class
* New experimental SPSS Class.
* Converters: Module CSV with new options. Added write() method for GGobi module
* New Mx exporter (http://www.vcu.edu/mx/)
* Class SimpleRegression: new methods standard error
* Added tests for regression and reliability, Vector#vector_mean, Dataset#dup (partial) and Dataset#verify
=== 0.1.8 / 2008-12-10
* Added Regression and Reliability modules
* Class Vector: added methods vector_standarized, recode, inspect, ranked
* Class Dataset: added methods vector_by_calculation, vector_sum, filter_field
* Module Correlation: added methods like spearman, point biserial and tau-b
* Added tests for Vector#ranked, Vector#vector_standarized, Vector#sum_of_squared_deviation, Dataset#vector_by_calculation, Dataset#vector_sum, Dataset#filter_field and various test for Correlation module
* Added demos: item_analysis and sample_test
=== 0.1.7 / 2008-10-1
* New module for codification
* ...
=== 0.1.6 / 2008-09-26
* New modules for SRS and stratified sampling
* Statsample::Database for read and write onto databases.
You could use Database and CSV on-tandem for mass-editing and reimport
of databases
=== 0.1.5 / 2008-08-29
* New extension statsampleopt for optimizing some functions on Statsample submodules
* New submodules Correlation and Test
=== 0.1.4 / 2008-08-27
* New extension, with cdf functions for
chi-square, t, gamma and normal distributions.
Based on dcdflib (http://www.netlib.org/random/)
Also, has a function to calculate the tail for a noncentral T distribution
=== 0.1.3 / 2008-08-22
* Operational versions of Vector, Dataset, Crosstab and Resample
* Read and write CSV files
* Calculate chi-square for 2 matrixes
=== 0.1.1 - 0.1.2 / 2008-08-18
* Included several methods on Ruby::Type classes
* Organized dirs with sow
=== 0.1.0 / 2008-08-12
* First version.
================================================
FILE: LICENSE.txt
================================================
Copyright (c) 2009-2014, Claudio Bustos
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: Manifest.txt
================================================
.travis.yml
Gemfile
Gemfile.lock
History.txt
LICENSE.txt
Manifest.txt
README.md
Rakefile
benchmarks/correlation_matrix_15_variables.rb
benchmarks/correlation_matrix_5_variables.rb
benchmarks/correlation_matrix_methods/correlation_matrix.ds
benchmarks/correlation_matrix_methods/correlation_matrix.html
benchmarks/correlation_matrix_methods/correlation_matrix.rb
benchmarks/correlation_matrix_methods/correlation_matrix.xls
benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods
benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods
benchmarks/correlation_matrix_methods/results.ds
benchmarks/factor_map.rb
benchmarks/helpers_benchmark.rb
data/locale/es/LC_MESSAGES/statsample.mo
doc_latex/manual/equations.tex
examples/boxplot.rb
examples/correlation_matrix.rb
examples/dataset.rb
examples/dominance_analysis.rb
examples/dominance_analysis_bootstrap.rb
examples/histogram.rb
examples/icc.rb
examples/levene.rb
examples/multiple_regression.rb
examples/multivariate_correlation.rb
examples/parallel_analysis.rb
examples/polychoric.rb
examples/principal_axis.rb
examples/reliability.rb
examples/scatterplot.rb
examples/t_test.rb
examples/tetrachoric.rb
examples/u_test.rb
examples/vector.rb
examples/velicer_map_test.rb
grab_references.rb
lib/spss.rb
lib/statsample.rb
lib/statsample/analysis.rb
lib/statsample/analysis/suite.rb
lib/statsample/analysis/suitereportbuilder.rb
lib/statsample/anova.rb
lib/statsample/anova/contrast.rb
lib/statsample/anova/oneway.rb
lib/statsample/anova/twoway.rb
lib/statsample/bivariate.rb
lib/statsample/bivariate/pearson.rb
lib/statsample/codification.rb
lib/statsample/converter/csv.rb
lib/statsample/converter/spss.rb
lib/statsample/converters.rb
lib/statsample/crosstab.rb
lib/statsample/dataset.rb
lib/statsample/dominanceanalysis.rb
lib/statsample/dominanceanalysis/bootstrap.rb
lib/statsample/factor.rb
lib/statsample/factor/map.rb
lib/statsample/factor/parallelanalysis.rb
lib/statsample/factor/pca.rb
lib/statsample/factor/principalaxis.rb
lib/statsample/factor/rotation.rb
lib/statsample/graph.rb
lib/statsample/graph/boxplot.rb
lib/statsample/graph/histogram.rb
lib/statsample/graph/scatterplot.rb
lib/statsample/histogram.rb
lib/statsample/matrix.rb
lib/statsample/multiset.rb
lib/statsample/regression.rb
lib/statsample/regression/multiple.rb
lib/statsample/regression/multiple/alglibengine.rb
lib/statsample/regression/multiple/baseengine.rb
lib/statsample/regression/multiple/gslengine.rb
lib/statsample/regression/multiple/matrixengine.rb
lib/statsample/regression/multiple/rubyengine.rb
lib/statsample/regression/simple.rb
lib/statsample/reliability.rb
lib/statsample/reliability/icc.rb
lib/statsample/reliability/multiscaleanalysis.rb
lib/statsample/reliability/scaleanalysis.rb
lib/statsample/reliability/skillscaleanalysis.rb
lib/statsample/resample.rb
lib/statsample/rserve_extension.rb
lib/statsample/shorthand.rb
lib/statsample/srs.rb
lib/statsample/test.rb
lib/statsample/test/bartlettsphericity.rb
lib/statsample/test/chisquare.rb
lib/statsample/test/f.rb
lib/statsample/test/kolmogorovsmirnov.rb
lib/statsample/test/levene.rb
lib/statsample/test/t.rb
lib/statsample/test/umannwhitney.rb
lib/statsample/test/wilcoxonsignedrank.rb
lib/statsample/vector.rb
lib/statsample/vector/gsl.rb
lib/statsample/version.rb
po/es/statsample.mo
po/es/statsample.po
po/statsample.pot
references.txt
setup.rb
test/fixtures/bank2.dat
test/fixtures/correlation_matrix.rb
test/fixtures/hartman_23.matrix
test/fixtures/repeated_fields.csv
test/fixtures/stock_data.csv
test/fixtures/test_csv.csv
test/fixtures/test_xls.xls
test/fixtures/tetmat_matrix.txt
test/fixtures/tetmat_test.txt
test/helpers_tests.rb
test/test_analysis.rb
test/test_anova_contrast.rb
test/test_anovaoneway.rb
test/test_anovatwoway.rb
test/test_anovatwowaywithdataset.rb
test/test_anovawithvectors.rb
test/test_bartlettsphericity.rb
test/test_bivariate.rb
test/test_codification.rb
test/test_crosstab.rb
test/test_csv.rb
test/test_dataset.rb
test/test_dominance_analysis.rb
test/test_factor.rb
test/test_factor_map.rb
test/test_factor_pa.rb
test/test_ggobi.rb
test/test_gsl.rb
test/test_histogram.rb
test/test_matrix.rb
test/test_multiset.rb
test/test_regression.rb
test/test_reliability.rb
test/test_reliability_icc.rb
test/test_reliability_skillscale.rb
test/test_resample.rb
test/test_rserve_extension.rb
test/test_srs.rb
test/test_statistics.rb
test/test_stest.rb
test/test_stratified.rb
test/test_test_f.rb
test/test_test_kolmogorovsmirnov.rb
test/test_test_t.rb
test/test_umannwhitney.rb
test/test_vector.rb
test/test_wilcoxonsignedrank.rb
test/test_xls.rb
web/Rakefile
================================================
FILE: README.md
================================================
# Statsample
Homepage :: https://github.com/sciruby/statsample
[](https://travis-ci.org/clbustos/statsample)
[](http://badge.fury.io/rb/statsample)
## DESCRIPTION
A suite for basic and advanced statistics on Ruby. Tested on Ruby 2.1.1p76 (June 2014), 1.8.7, 1.9.1, 1.9.2 (April, 2010), ruby-head(June, 2011) and JRuby 1.4 (Ruby 1.8.7 compatible).
Include:
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
* Imports and exports datasets from and to Excel, CSV and plain text files.
* Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial, tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by +statsample-bivariate-extension+ gem.
* Intra-class correlation
* Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for One-way ANOVA.
* Tests: F, T, Levene, U-Mannwhitney.
* Regression: Simple, Multiple (OLS), Probit and Logit
* Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis and Velicer's MAP test, for estimation of number of factors.
* Reliability analysis for simple scale and a DSL to easily analyze multiple scales using factor analysis and correlations, if you want it.
* Basic time series support
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
* Sample calculation related formulas
* Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
* Creates reports on text, html and rtf, using ReportBuilder gem
* Graphics: Histogram, Boxplot and Scatterplot
## Principles
* Software Design:
* One module/class for each type of analysis
* Options can be set as hash on initialize() or as setters methods
* Clean API for interactive sessions
* summary() returns all necessary informacion for interactive sessions
* All statistical data available though methods on objects
* All (important) methods should be tested. Better with random data.
* Statistical Design
* Results are tested against text results, SPSS and R outputs.
* Go beyond Null Hiphotesis Testing, using confidence intervals and effect sizes when possible
* (When possible) All references for methods are documented, providing sensible information on documentation
## Features
* Classes for manipulation and storage of data:
* Statsample::Vector: An extension of an array, with statistical methods like sum, mean and standard deviation
* Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample.
* Statsample::Multiset: multiple datasets with same fields and type of vectors
* Anova module provides generic Statsample::Anova::OneWay and vector based Statsample::Anova::OneWayWithVectors. Also you can create contrast using Statsample::Anova::Contrast
* Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
* Multiple types of regression.
* Simple Regression : Statsample::Regression::Simple
* Multiple Regression: Statsample::Regression::Multiple
* Logit Regression: Statsample::Regression::Binomial::Logit
* Probit Regression: Statsample::Regression::Binomial::Probit
* Factorial Analysis algorithms on Statsample::Factor module.
* Classes for Extraction of factors:
* Statsample::Factor::PCA
* Statsample::Factor::PrincipalAxis
* Classes for Rotation of factors:
* Statsample::Factor::Varimax
* Statsample::Factor::Equimax
* Statsample::Factor::Quartimax
* Classes for calculation of factors to retain
* Statsample::Factor::ParallelAnalysis performs Horn's 'parallel analysis' to a principal components analysis to adjust for sample bias in the retention of components.
* Statsample::Factor::MAP performs Velicer's Minimum Average Partial (MAP) test, which retain components as long as the variance in the correlation matrix represents systematic variance.
* Dominance Analysis. Based on Budescu and Azen papers, dominance analysis is a method to analyze the relative importance of one predictor relative to another on multiple regression
* Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables
* Statsample::DominanceAnalysis::Bootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
* Module Statsample::Codification, to help to codify open questions
* Converters to import and export data:
* Statsample::Database : Can create sql to create tables, read and insert data
* Statsample::CSV : Read and write CSV files
* Statsample::Excel : Read and write Excel files
* Statsample::Mx : Write Mx Files
* Statsample::GGobi : Write Ggobi files
* Module Statsample::Crosstab provides function to create crosstab for categorical data
* Module Statsample::Reliability provides functions to analyze scales with psychometric methods.
* Class Statsample::Reliability::ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
* Class Statsample::Reliability::MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
* Class Statsample::Reliability::ICC provides intra-class correlation, using Shrout & Fleiss(1979) and McGraw & Wong (1996) formulations.
* Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
* Module Statsample::Test provides several methods and classes to perform inferencial statistics
* Statsample::Test::BartlettSphericity
* Statsample::Test::ChiSquare
* Statsample::Test::F
* Statsample::Test::KolmogorovSmirnov (only D value)
* Statsample::Test::Levene
* Statsample::Test::UMannWhitney
* Statsample::Test::T
* Statsample::Test::WilcoxonSignedRank
* Module Graph provides several classes to create beautiful graphs using rubyvis
* Statsample::Graph::Boxplot
* Statsample::Graph::Histogram
* Statsample::Graph::Scatterplot
* Gem <tt>bio-statsample-timeseries</tt> provides module Statsample::TimeSeries with support for time series, including ARIMA estimation using Kalman-Filter.
* Gem <tt>statsample-sem</tt> provides a DSL to R libraries +sem+ and +OpenMx+
* Gem <tt>statsample-glm</tt> provides you with GML method, to work with Logistic, Poisson and Gaussian regression ,using ML or IRWLS.
* Close integration with gem <tt>reportbuilder</tt>, to easily create reports on text, html and rtf formats.
# Examples of use:
See the [examples folder](https://github.com/clbustos/statsample/tree/master/examples/) too.
## Boxplot
```ruby
require 'statsample'
ss_analysis(Statsample::Graph::Boxplot) do
n=30
a=rnorm(n-1,50,10)
b=rnorm(n, 30,5)
c=rnorm(n,5,1)
a.push(2)
boxplot(:vectors=>[a,b,c], :width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
end
Statsample::Analysis.run # Open svg file on *nix application defined
```
## Correlation matrix
```ruby
require 'statsample'
# Note R like generation of random gaussian variable
# and correlation matrix
ss_analysis("Statsample::Bivariate.correlation_matrix") do
samples=1000
ds=data_frame(
'a'=>rnorm(samples),
'b'=>rnorm(samples),
'c'=>rnorm(samples),
'd'=>rnorm(samples))
cm=cor(ds)
summary(cm)
end
Statsample::Analysis.run_batch # Echo output to console
```
# Requirements
Optional:
* Plotting: gnuplot and rbgnuplot, SVG::Graph
* Factorial analysis and polychorical correlation(joint estimate and polychoric series): gsl library and rb-gsl (https://rubygems.org/gems/rb-gsl/). You should install it using <tt>gem install rb-gsl</tt>.
*Note*: Use gsl 1.12.109 or later.
# Resources
* Source code on github :: http://github.com/clbustos/statsample
* Docs :: http://statsample.apsique.cl/
* Bug report and feature request :: http://github.com/clbustos/statsample/issues
* E-mailing list :: http://groups.google.com/group/statsample
# Installation
```bash
$ sudo gem install statsample
```
On *nix, you should install statsample-optimization to retrieve gems gsl, statistics2 and a C extension to speed some methods.
There are available precompiled version for Ruby 1.9 on x86, x86_64 and mingw32 archs.
```bash
$ sudo gem install statsample-optimization
```
If you use Ruby 1.8, you should compile statsample-optimization, usign parameter <tt>--platform ruby</tt>
```bash
$ sudo gem install statsample-optimization --platform ruby
```
If you need to work on Structural Equation Modeling, you could see +statsample-sem+. You need R with +sem+ or +OpenMx+ [http://openmx.psyc.virginia.edu/] libraries installed
```bash
$ sudo gem install statsample-sem
```
Available setup.rb file
```bash
sudo gem ruby setup.rb
```
## License
BSD-3 (See LICENSE.txt)
Could change between version, without previous warning. If you want a specific license, just choose the version that you need.
================================================
FILE: Rakefile
================================================
#!/usr/bin/ruby
# -*- ruby -*-
# -*- coding: utf-8 -*-
$:.unshift(File.dirname(__FILE__)+'/lib/')
require 'rubygems'
require 'statsample'
require 'hoe'
require 'rdoc'
Hoe.plugin :git
Hoe.plugin :doofus
desc "Ruby Lint"
task :lint do
executable=Config::CONFIG['RUBY_INSTALL_NAME']
Dir.glob("lib/**/*.rb") {|f|
if !system %{#{executable} -w -c "#{f}"}
puts "Error on: #{f}"
end
}
end
task :release do
system %{git push origin master}
end
task "clobber_docs" do
# Only to omit warnings
end
desc "Update pot/po files."
task "gettext:updatepo" do
require 'gettext/tools'
GetText.update_pofiles("statsample", Dir.glob("{lib,bin}/**/*.{rb,rhtml}"), "statsample #{Statsample::VERSION}")
end
desc "Create mo-files"
task "gettext:makemo" do
require 'gettext/tools'
GetText.create_mofiles()
# GetText.create_mofiles(true, "po", "locale") # This is for "Ruby on Rails".
end
h=Hoe.spec('statsample') do
self.version=Statsample::VERSION
self.urls=["https://github.com/clbustos/statsample"]
#self.testlib=:minitest
self.readme_file = 'README.md'
self.urls = ['https://github.com/clbustos/statsample']
self.developer('Claudio Bustos', 'clbustos@gmail.com')
self.extra_deps << ["spreadsheet","~>0.6"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client"] << ["rubyvis"] << ["distribution"]
self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>3"] << ["minitest", "~>2"] << ["gettext", "~>0"] << ["mocha", "~>0"] << ["hoe-git", "~>0"]
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
self.post_install_message = <<-EOF
***************************************************
Thanks for installing statsample.
On *nix, you could install statsample-optimization
to retrieve gems gsl, statistics2 and a C extension
to speed some methods.
$ sudo gem install statsample-optimization
On Ubuntu, install build-essential and libgsl0-dev
using apt-get. Compile ruby 1.8 or 1.9 from
source code first.
$ sudo apt-get install build-essential libgsl0-dev
*****************************************************
EOF
self.need_rdoc=false
end
if Rake.const_defined?(:RDocTask)
Rake::RDocTask.new(:docs) do |rd|
rd.main = h.readme_file
rd.options << '-d' if (`which dot` =~ /\/dot/) unless
ENV['NODOT'] || Hoe::WINDOZE
rd.rdoc_dir = 'doc'
rd.rdoc_files.include("lib/**/*.rb")
rd.rdoc_files += h.spec.extra_rdoc_files
rd.rdoc_files.reject! {|f| f=="Manifest.txt"}
title = h.spec.rdoc_options.grep(/^(-t|--title)=?$/).first
if title then
rd.options << title
unless title =~ /\=/ then # for ['-t', 'title here']
title_index = spec.rdoc_options.index(title)
rd.options << spec.rdoc_options[title_index + 1]
end
else
title = "#{h.name}-#{h.version} Documentation"
title = "#{h.rubyforge_name}'s " + title if h.rubyforge_name != h.name
rd.options << '--title' << title
end
end
end
desc 'Publish rdocs with analytics support'
task :publicar_docs => [:clean] do
# ruby %{agregar_adsense_a_doc.rb}
path = File.expand_path("./doc.yaml")
config = YAML.load(File.read(path))
host = "#{config["user"]}@#{config["host"]}"
remote_dir = config["dir"]
local_dir = h.local_rdoc_dir
Dir.glob(local_dir+"/**/*") {|file|
sh %{chmod 755 #{file}}
}
sh %{rsync #{h.rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
end
# vim: syntax=Ruby
================================================
FILE: benchmarks/correlation_matrix_15_variables.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
extend BenchPress
cases=250
vars=20
name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
author 'Clbustos'
date '2011-01-18'
summary "
A correlation matrix could be constructed using matrix algebra or
mannualy, calculating covariances, means and sd for each pair of vectors.
In this test, we test the calculation using #{vars} variables with
#{cases} cases on each vector
"
reps 200 #number of repetitions
ds=vars.times.inject({}) {|ac,v|
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
ac
}.to_dataset
measure "Statsample::Bivariate.correlation_matrix_optimized" do
Statsample::Bivariate.correlation_matrix_optimized(ds)
end
measure "Statsample::Bivariate.correlation_matrix_pairwise" do
Statsample::Bivariate.correlation_matrix_pairwise(ds)
end
================================================
FILE: benchmarks/correlation_matrix_5_variables.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
extend BenchPress
cases=500
vars=5
name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
author 'Clbustos'
date '2011-01-18'
summary "
A correlation matrix could be constructed using matrix algebra or
mannualy, calculating covariances, means and sd for each pair of vectors.
In this test, we test the calculation using #{vars} variables with
#{cases} cases on each vector
"
reps 200 #number of repetitions
ds=vars.times.inject({}) {|ac,v|
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
ac
}.to_dataset
measure "Statsample::Bivariate.correlation_matrix_optimized" do
Statsample::Bivariate.correlation_matrix_optimized(ds)
end
measure "Statsample::Bivariate.correlation_matrix_pairwise" do
Statsample::Bivariate.correlation_matrix_pairwise(ds)
end
================================================
FILE: benchmarks/correlation_matrix_methods/correlation_matrix.html
================================================
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
<title>Correlation matrix analysis</title>
<style>
body {
margin:0;
padding:1em;
}
table {
border-collapse: collapse;
}
table td {
border: 1px solid black;
}
.section {
margin:0.5em;
}
</style>
</head><body>
<h1>Correlation matrix analysis</h1><div id='toc'><div class='title'>List of contents</div>
<ul>
<li><a href='#toc_1'>Multiple reggresion of cases,vars,c_v on time_optimized</a></li>
<ul>
<li><a href='#toc_2'>ANOVA</a></li>
</ul>
<li><a href='#toc_3'>Multiple reggresion of cases,vars,c_v on time_pairwise</a></li>
<ul>
<li><a href='#toc_4'>ANOVA</a></li>
</ul>
</ul>
</div>
<div class='tot'><div class='title'>List of tables</div><ul><li><a href='#table_1'>ANOVA Table</a></li><li><a href='#table_2'>Beta coefficients</a></li><li><a href='#table_3'>ANOVA Table</a></li><li><a href='#table_4'>Beta coefficients</a></li></ul></div>
<div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_optimized</h2><a name='toc_1'></a>
<p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
<p>Cases(listwise)=63(63)</p>
<p>R=0.978844</p>
<p>R^2=0.958137</p>
<p>R^2 Adj=0.956008</p>
<p>Std.Error R=3.092024</p>
<p>Equation=4.031667 + 0.018039cases + 0.244790vars + 0.001197c_v</p>
<div class='section'><h3>ANOVA</h3><a name='toc_2'></a>
<a name='table_1'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
<tbody>
<tr><td>Regression</td><td>12910.098</td><td>3</td><td>4303.366</td><td>450.114</td><td>0.000</td></tr>
<tr><td>Error</td><td>564.076</td><td>59</td><td>9.561</td><td></td><td></td></tr>
<tr><td>Total</td><td>13474.174</td><td>62</td><td>4312.927</td><td></td><td></td></tr>
</tbody>
</table>
</div>
<a name='table_2'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
<tbody>
<tr><td>Constant</td><td>4.031667</td><td>-</td><td>0.752604</td><td>5.356953</td></tr>
<tr><td>cases</td><td>0.018039</td><td>0.381587</td><td>0.001961</td><td>9.200093</td></tr>
<tr><td>vars</td><td>0.244790</td><td>0.224390</td><td>0.036055</td><td>6.789335</td></tr>
<tr><td>c_v</td><td>0.001197</td><td>0.584174</td><td>0.000094</td><td>12.738410</td></tr>
</tbody>
</table>
</div>
<div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_pairwise</h2><a name='toc_3'></a>
<p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
<p>Cases(listwise)=63(63)</p>
<p>R=0.999637</p>
<p>R^2=0.999275</p>
<p>R^2 Adj=0.999238</p>
<p>Std.Error R=0.538365</p>
<p>Equation=-0.520303 + -0.000708cases + 1.234451vars + 0.000735c_v</p>
<div class='section'><h3>ANOVA</h3><a name='toc_4'></a>
<a name='table_3'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
<tbody>
<tr><td>Regression</td><td>23554.271</td><td>3</td><td>7851.424</td><td>27089.134</td><td>0.000</td></tr>
<tr><td>Error</td><td>17.100</td><td>59</td><td>0.290</td><td></td><td></td></tr>
<tr><td>Total</td><td>23571.372</td><td>62</td><td>7851.714</td><td></td><td></td></tr>
</tbody>
</table>
</div>
<a name='table_4'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
<tbody>
<tr><td>Constant</td><td>-0.520303</td><td>-</td><td>0.131039</td><td>-3.970594</td></tr>
<tr><td>cases</td><td>-0.000708</td><td>-0.011324</td><td>0.000341</td><td>-2.074007</td></tr>
<tr><td>vars</td><td>1.234451</td><td>0.855546</td><td>0.006278</td><td>196.641087</td></tr>
<tr><td>c_v</td><td>0.000735</td><td>0.271138</td><td>0.000016</td><td>44.912972</td></tr>
</tbody>
</table>
</div>
</body></html>
================================================
FILE: benchmarks/correlation_matrix_methods/correlation_matrix.rb
================================================
# This test create a database to adjust the best algorithm
# to use on correlation matrix
require(File.expand_path(File.dirname(__FILE__)+'/../helpers_benchmark.rb'))
require 'statsample'
require 'benchmark'
def create_dataset(vars,cases)
ran=Distribution::Normal.rng
ds=vars.times.inject({}) {|ac,v|
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {ran.call}
ac
}.to_dataset
end
def prediction_pairwise(vars,cases)
Statsample::Bivariate.prediction_pairwise(vars,cases) / 10
end
def prediction_optimized(vars,cases)
Statsample::Bivariate.prediction_optimized(vars,cases) / 10
end
if !File.exists?("correlation_matrix.ds") or File.mtime(__FILE__) > File.mtime("correlation_matrix.ds")
reps=100 #number of repetitions
ds_sizes=[5,10,30,50,100,150,200,500,1000]
ds_vars=[3,4,5,10,20,30,40]
#ds_sizes=[5,10]
#ds_vars=[3,5,20]
rs=Statsample::Dataset.new(%w{cases vars time_optimized time_pairwise})
ds_sizes.each do |cases|
ds_vars.each do |vars|
ds=create_dataset(vars,cases)
time_optimized= Benchmark.realtime do
reps.times {
Statsample::Bivariate.correlation_matrix_optimized(ds)
ds.clear_gsl
}
end
time_pairwise= Benchmark.realtime do
reps.times {
Statsample::Bivariate.correlation_matrix_pairwise(ds)
}
end
puts "Cases:#{cases}, vars:#{vars} -> opt:%0.3f (%0.3f) | pair: %0.3f (%0.3f)" % [time_optimized, prediction_optimized(vars,cases), time_pairwise, prediction_pairwise(vars,cases)]
rs.add_case({'cases'=>cases,'vars'=>vars,'time_optimized'=>Math.sqrt(time_optimized*1000),'time_pairwise'=>Math.sqrt(time_pairwise*1000)})
end
end
else
rs=Statsample.load("correlation_matrix.ds")
end
rs.fields.each {|f| rs[f].type=:scale}
rs['c_v']=rs.collect {|row| row['cases']*row['vars']}
rs.update_valid_data
rs.save("correlation_matrix.ds")
Statsample::Excel.write(rs,"correlation_matrix.xls")
rb=ReportBuilder.new(:name=>"Correlation matrix analysis")
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_optimized','c_v']],'time_optimized', :digits=>6))
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_pairwise','c_v']],'time_pairwise', :digits=>6))
rb.save_html("correlation_matrix.html")
================================================
FILE: benchmarks/factor_map.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
extend BenchPress
name "Statsample::Factor::Map with and without GSL"
author 'Clbustos'
date '2011-01-18'
summary "Velicer's MAP uses a lot of Matrix algebra. How much we can improve the timing using GSL?
"
reps 20 #number of repetitions
m=Matrix[
[ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
[ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
[ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
[ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
[ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
[ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
[ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
[ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
]
map=Statsample::Factor::MAP.new(m)
measure "Statsample::Factor::MAP without GSL" do
map.use_gsl=false
map.compute
end
measure "Statsample::Factor::MAP with GSL" do
map.use_gsl=true
map.compute
end
================================================
FILE: benchmarks/helpers_benchmark.rb
================================================
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))
require 'statsample'
require 'bench_press'
================================================
FILE: doc_latex/manual/equations.tex
================================================
\part{Equations}
\section{Convention}
\begin{align*}
n &= \text{sample size}\\
N &= \text{population size}\\
p &= \text{proportion inside a sample}\\
P &= \text{proportion inside a population}
\end{align*}
\section{Ruby::Regression::Multiple}
To compute the standard error of coefficients, you obtain the estimated variance-covariance matrix of error.
Let \mathbf{X} be matrix of predictors data, including a constant column; \mathbf{MSE} as mean square error; SSE as Sum of squares of errors; n the number of cases; p as number of predictors
\begin{equation}
\mathbf{MSE}=\frac{SSE}{n-p-1}
\end{equation}
\begin{equation}
\mathbf{E}=(\mathbf{X'}\mathbf{X})^-1\mathbf{MSE}
\end{equation}
The root squares of diagonal should be standard errors
\section{Ruby::SRS}
Finite Poblation correction is used on standard error calculation on poblation below 10.000. Function
\begin{verbatim}
fpc_var(sam,pop)
\end{verbatim}
calculate FPC for variance with
\begin{equation}
fpc_{var} = \frac{N-n} {N-1}
\end{equation}
with n as sam and N as pop
Function
\begin{verbatim}
fpc = fpc(sam,pop)
\end{verbatim}
calculate FPC for standard deviation with
\begin{equation}
fpc_{sd} = \sqrt{\frac{N-n} {N-1}}
\label{fpc}
\end{equation}
with n as sample size and N as population size.
\subsection{Sample Size estimation for proportions}
On infinite poblations, you should use method
\begin{verbatim}
estimation_n0(d,prop,margin=0.95)
\end{verbatim}
which uses
\begin{equation}
n = \frac{t^2(pq)}{d^2}
\label{n_i}
\end{equation}
where
\begin{align*}
t &= \text{t value for given level of confidence ( 1.96 for 95\% )}\\
d &= \text{margin of error}
\end{align*}
On finite poblations, you should use
\begin{verbatim}
estimation_n(d,prop,n_pobl, margin=0.95)
\end{verbatim}
which uses
\begin{equation}
n = \frac{n_i}{1+(\frac{n_i-1}{N})}
\end{equation}
Where $n_i$ is n on \ref{n_i} and N is population size
================================================
FILE: examples/boxplot.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Graph::Boxplot) do
n=30
a=rnorm(n-1,50,10)
b=rnorm(n, 30,5)
c=rnorm(n,5,1)
a.push(2)
boxplot(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
end
if __FILE__==$0
Statsample::Analysis.run
end
================================================
FILE: examples/correlation_matrix.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
samples=1000
ds=data_frame(
'a'=>rnorm(samples),
'b'=>rnorm(samples),
'c'=>rnorm(samples),
'd'=>rnorm(samples))
cm=cor(ds)
summary(cm)
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/dataset.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Dataset) do
samples=1000
a=Statsample::Vector.new_scale(samples) {r=rand(5); r==4 ? nil: r}
b=Statsample::Vector.new_scale(samples) {r=rand(5); r==4 ? nil: r}
ds={'a'=>a,'b'=>b}.to_dataset
summary(ds)
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/dominance_analysis.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::DominanceAnalysis) do
sample=300
a=rnorm(sample)
b=rnorm(sample)
c=rnorm(sample)
d=rnorm(sample)
ds={'a'=>a,'b'=>b,'cc'=>c,'d'=>d}.to_dataset
attach(ds)
ds['y']=a*5+b*3+cc*2+d+rnorm(300)
cm=cor(ds)
summary(cm)
lr=lr(ds,'y')
summary(lr)
da=dominance_analysis(ds,'y')
summary(da)
da=dominance_analysis(ds,'y',:name=>"Dominance Analysis using group of predictors", :predictors=>['a', 'b', %w{cc d}])
summary(da)
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/dominance_analysis_bootstrap.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::DominanceAnalysis::Bootstrap) do
sample=300
a=rnorm(sample)
b=rnorm(sample)
c=rnorm(sample)
d=rnorm(sample)
a.name="a"
b.name="b"
c.name="c"
d.name="d"
ds={'a'=>a,'b'=>b,'cc'=>c,'d'=>d}.to_dataset
attach(ds)
ds['y1']=a*5+b*2+cc*2+d*2+rnorm(sample,0,10)
ds['y2']=a*10+rnorm(sample)
dab=dominance_analysis_bootstrap(ds, ['y1','y2'], :debug=>true)
dab.bootstrap(100,nil)
summary(dab)
ds2=ds['a'..'y1']
dab2=dominance_analysis_bootstrap(ds2, 'y1', :debug=>true)
dab2.bootstrap(100,nil)
summary(dab2)
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/histogram.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Graph::Histogram) do
histogram(rnorm(3000,0,20))
end
if __FILE__==$0
Statsample::Analysis.run
end
================================================
FILE: examples/icc.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Reliability::ICC) do
size=1000
a=Statsample::Vector.new_scale(size) {rand(10)}
b=a.recode{|i|i+rand(4)-2}
c=a.recode{|i|i+rand(4)-2}
d=a.recode{|i|i+rand(4)-2}
@ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
@icc=Statsample::Reliability::ICC.new(@ds)
summary(@icc)
@icc.type=:icc_3_1
summary(@icc)
@icc.type=:icc_a_k
summary(@icc)
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/levene.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Test::Levene) do
a=[1,2,3,4,5,6,7,8,100,10].to_scale
b=[30,40,50,60,70,80,90,100,110,120].to_scale
summary(levene([a,b]))
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/multiple_regression.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Regression::Multiple) do
samples=2000
ds=dataset('a'=>rnorm(samples),'b'=>rnorm(samples),'cc'=>rnorm(samples),'d'=>rnorm(samples))
attach(ds)
ds['y']=a*5+b*3+cc*2+d+rnorm(samples)
summary lr(ds,'y')
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/multivariate_correlation.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
require 'mathn'
Statsample::Analysis.store(Statsample::Regression::Multiple::MultipleDependent) do
complete=Matrix[
[1,0.53,0.62,0.19,-0.09,0.08,0.02,-0.12,0.08],
[0.53,1,0.61,0.23,0.1,0.18,0.02,-0.1,0.15],
[0.62,0.61,1,0.03,0.1,0.12,0.03,-0.06,0.12],
[0.19,0.23,0.03,1,-0.02,0.02,0,-0.02,-0.02],
[-0.09,0.1,0.1,-0.02,1,0.05,0.06,0.18,0.02],
[0.08,0.18,0.12,0.02,0.05,1,0.22,-0.07,0.36],
[0.02,0.02,0.03,0,0.06,0.22,1,-0.01,-0.05],
[-0.12,-0.1,-0.06,-0.02,0.18,-0.07,-0.01,1,-0.03],
[0.08,0.15,0.12,-0.02,0.02,0.36,-0.05,-0.03,1]]
complete.extend Statsample::CovariateMatrix
complete.fields=%w{adhd cd odd sex age monly mwork mage poverty}
lr=Statsample::Regression::Multiple::MultipleDependent.new(complete, %w{adhd cd odd})
echo "R^2_yx #{lr.r2yx}"
echo "P^2_yx #{lr.p2yx}"
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/parallel_analysis.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
samples=150
variables=30
iterations=50
Statsample::Analysis.store(Statsample::Factor::ParallelAnalysis) do
rng = Distribution::Normal.rng()
f1=rnorm(samples)
f2=rnorm(samples)
f3=rnorm(samples)
vectors={}
variables.times do |i|
vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.call}.to_scale
vectors["v#{i}"].name="Vector #{i}"
end
ds=vectors.to_dataset
pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>iterations, :debug=>true)
pca=pca(cor(ds))
echo "There are 3 real factors on data"
summary pca
echo "Traditional Kaiser criterion (k>1) returns #{pca.m} factors"
summary pa
echo "Parallel Analysis returns #{pa.number_of_factors} factors to preserve"
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/polychoric.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
$:.unshift("/home/cdx/usr/lib/statsample-bivariate-extension/lib/")
require 'statsample'
Statsample::Analysis.store(Statsample::Bivariate::Polychoric) do
ct=Matrix[[rand(10)+50, rand(10)+50, rand(10)+1],
[rand(20)+5, rand(50)+4, rand(10)+1],
[rand(8)+1, rand(12)+1, rand(10)+1]]
# Estimation of polychoric correlation using two-step (default)
poly=polychoric(ct, :name=>"Polychoric with two-step", :debug=>false)
summary poly
# Estimation of polychoric correlation using joint method (slow)
poly=polychoric(ct, :method=>:joint, :name=>"Polychoric with joint")
summary poly
# Uses polychoric series (not recomended)
poly=polychoric(ct, :method=>:polychoric_series, :name=>"Polychoric with polychoric series")
summary poly
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/principal_axis.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Factor::PrincipalAxis) do
matrix=Matrix[
[1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807], [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
matrix.extend Statsample::CovariateMatrix
#matrix.fields=%w{a b c d}
fa=principal_axis(matrix,:m=>1,:smc=>false)
summary fa
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/reliability.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib')
require 'statsample'
Statsample::Analysis.store(Statsample::Reliability) do
samples=100
a=rnorm(samples)
ds=Statsample::Dataset.new
20.times do |i|
ds["v#{i}"]=a+rnorm(samples,0,0.2)
end
ds.update_valid_data
rel=Statsample::Reliability::ScaleAnalysis.new(ds)
summary rel
ms=Statsample::Reliability::MultiScaleAnalysis.new(:name=>"Multi Scale analyss") do |m|
m.scale "Scale 1", ds.clone(%w{v1 v2 v3 v4 v5 v6 v7 v8 v9 v10})
m.scale "Scale 2", ds.clone(%w{v11 v12 v13 v14 v15 v16 v17 v18 v19})
end
summary ms
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/scatterplot.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
$:.unshift('/home/cdx/dev/reportbuilder/lib/')
require 'benchmark'
require 'statsample'
n=100
Statsample::Analysis.store(Statsample::Graph::Scatterplot) do
x=rnorm(n)
y=x+rnorm(n,0.5,0.2)
scatterplot(x,y)
end
if __FILE__==$0
Statsample::Analysis.run
end
================================================
FILE: examples/t_test.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib')
require 'statsample'
Statsample::Analysis.store(Statsample::Test::T) do
a=rnorm(10)
t_1=Statsample::Test.t_one_sample(a,{:u=>50})
summary t_1
b=rnorm(10,2)
t_2=Statsample::Test.t_two_samples_independent(a,b)
summary t_2
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/tetrachoric.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Bivariate::Tetrachoric) do
a=40
b=10
c=20
d=30
summary tetrachoric(a,b,c,d)
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/u_test.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib')
require 'statsample'
Statsample::Analysis.store(Statsample::Test::UMannWhitney) do
a=10.times.map {rand(100)}.to_scale
b=20.times.map {(rand(20))**2+50}.to_scale
u=Statsample::Test::UMannWhitney.new(a,b)
summary u
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/vector.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Vector) do
a=Statsample::Vector.new_scale(1000) {r=rand(5); r==4 ? nil: r;}
summary a
b=c(1,2,3,4,6..10)
summary b
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: examples/velicer_map_test.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Factor::MAP) do
rng=Distribution::Normal.rng
samples=100
variables=10
f1=rnorm(samples)
f2=rnorm(samples)
vectors={}
variables.times do |i|
vectors["v#{i}"]=samples.times.collect {|nv|
if i<5
f1[nv]*5 + f2[nv] *2 +rng.call
else
f1[nv]*2 + f2[nv] *3 +rng.call
end
}.to_scale
end
ds=vectors.to_dataset
cor=cor(ds)
pca=pca(cor)
map=Statsample::Factor::MAP.new(cor)
echo ("There are 2 real factors on data")
summary(pca)
echo("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
summary(map)
echo("Velicer's MAP Test returns #{map.number_of_factors} factors to preserve")
end
if __FILE__==$0
Statsample::Analysis.run_batch
end
================================================
FILE: grab_references.rb
================================================
#!/usr/bin/env ruby1.9
require 'reportbuilder'
refs=[]
Dir.glob "**/*.rb" do |f|
next if f=~/pkg/
reference=false
File.open(f).each_line do |l|
if l=~/== Reference/
reference=true
elsif reference
if l=~/\*\s+(.+)/
refs.push $1
else
reference=false
end
end
end
end
rb=ReportBuilder.new(:name=>"References") do |g|
refs.uniq.sort.each do |r|
g.text "* #{r}"
end
end
rb.save_text("references.txt")
================================================
FILE: lib/spss.rb
================================================
# = spss.rb -
#
# Provides utilites for working with spss files
#
# Copyright (C) 2009 Claudio Bustos
#
# Claudio Bustos mailto:clbustos@gmail.com
module SPSS # :nodoc: all
module Dictionary
class Element
def add(a)
@elements.push(a)
end
def parse_elements(func=:to_s)
@elements.collect{|e| " "+e.send(func)}.join("\n")
end
def init_with config
config.each {|key,value|
self.send(key.to_s+"=",value) if methods.include? key.to_s
}
end
def initialize(config={})
@config=config
@elements=[]
end
end
class Dictionary < Element
attr_accessor :locale, :date_time, :row_count
def initialize(config={})
super
init_with ({
:locale=>"en_US",
:date_time=>Time.new().strftime("%Y-%m-%dT%H:%M:%S"),
:row_count=>1
})
init_with config
end
def to_xml
"<dictionary locale='#{@locale}' creationDateTime='#{@date_time}' rowCount='#{@row_count}' xmlns='http://xml.spss.com/spss/data'>\n"+parse_elements(:to_xml)+"\n</dictionary>"
end
def to_spss
parse_elements(:to_spss)
end
end
class MissingValue < Element
attr_accessor :data, :type, :from, :to
def initialize(data,type=nil)
@data=data
if type.nil? or type=="lowerBound" or type=="upperBound"
@type=type
else
raise Exception,"Incorrect value for type"
end
end
def to_xml
"<missingValue data='#{@data}' "+(type.nil? ? "":"type='#{type}'")+"/>"
end
end
class LabelSet
attr_accessor
def initialize(labels)
@labels=labels
end
def parse_xml(name)
"<valueLabelSet>\n "+@labels.collect{|key,value| "<valueLabel label='#{key}' value='#{value}' />"}.join("\n ")+"\n <valueLabelVariable name='#{name}' />\n</valueLabelSet>"
end
def parse_spss()
@labels.collect{|key,value| "#{key} '#{value}'"}.join("\n ")
end
end
class Variable < Element
attr_accessor :aligment, :display_width, :label, :measurement_level, :name, :type, :decimals, :width, :type_format, :labelset, :missing_values
def initialize(config={})
super
@@var_number||=1
init_with({
:aligment => "left",
:display_width => 8,
:label => "Variable #{@@var_number}",
:measurement_level => "SCALE",
:name => "var#{@@var_number}",
:type => 0,
:decimals => 2,
:width => 10,
:type_format => "F",
:labelset => nil
})
init_with config
@missing_values=[]
@@var_number+=1
end
def to_xml
labelset_s=(@labelset.nil?) ? "":"\n"+@labelset.parse_xml(@name)
missing_values=(@missing_values.size>0) ? @missing_values.collect {|m| m.to_xml}.join("\n"):""
"<variable aligment='#{@aligment}' displayWidth='#{@display_width}' label='#{@label}' measurementLevel='#{@measurement_level}' name='#{@name}' type='#{@type}'>\n<variableFormat decimals='#{@decimals}' width='#{@width}' type='#{@type_format}' />\n"+parse_elements(:to_xml)+missing_values+"</variable>"+labelset_s
end
def to_spss
out=<<HERE
VARIABLE LABELS #{@name} '#{label}' .
VARIABLE ALIGMENT #{@name} (#{@aligment.upcase}) .
VARIABLE WIDTH #{@name} (#{@display_width}) .
VARIABLE LEVEL #{@name} (#{@measurement_level.upcase}) .
HERE
if !@labelset.nil?
out << "VALUE LABELS #{@name} "+labelset.parse_spss()+" ."
end
if @missing_values.size>0
out << "MISSING VALUES #{@name} ("+@missing_values.collect{|m| m.data}.join(",")+") ."
end
out
end
end
end
end
n=SPSS::Dictionary::Dictionary.new
ls=SPSS::Dictionary::LabelSet.new({1=>"Si",2=>"No"})
var1=SPSS::Dictionary::Variable.new
var1.labelset=ls
mv1=SPSS::Dictionary::MissingValue.new("-99")
var2=SPSS::Dictionary::Variable.new
n.add(var1)
n.add(var2)
var2.missing_values=[mv1]
File.open("dic_spss.sps","wb") {|f|
f.puts n.to_spss
}
================================================
FILE: lib/statsample/analysis/suite.rb
================================================
module Statsample
module Analysis
class Suite
include Statsample::Shorthand
attr_accessor :output
attr_accessor :name
attr_reader :block
def initialize(opts=Hash.new(), &block)
if !opts.is_a? Hash
opts={:name=>opts}
end
@block=block
@name=opts[:name] || "Analysis #{Time.now}"
@attached=[]
@output=opts[:output] || ::STDOUT
end
# Run the analysis, putting output on
def run
@block.arity<1 ? instance_eval(&@block) : @block.call(self)
end
# Provides a description of the procedure. Only appears as a commentary on
# SuiteReportBuilder outputs
def desc(d)
@output.puts("Description:")
@output.puts(" #{d}")
end
def echo(*args)
@output.puts(*args)
end
def summary(obj)
obj.summary
end
def add_to_reportbuilder(rb)
SuiteReportBuilder.new({:name=>name, :rb=>rb}, &block)
end
def generate(filename)
ar=SuiteReportBuilder.new({:name=>name}, &block)
ar.generate(filename)
end
def to_text
ar=SuiteReportBuilder.new({:name=>name}, &block)
ar.to_text
end
def attach(ds)
@attached.push(ds)
end
def detach(ds=nil)
if ds.nil?
@attached.pop
else
@attached.delete(ds)
end
end
alias :old_boxplot :boxplot
alias :old_histogram :histogram
alias :old_scatterplot :scatterplot
def show_svg(svg)
require 'tmpdir'
fn=Dir.tmpdir+"/image_#{Time.now.to_f}.svg"
File.open(fn,"w") {|fp| fp.write svg}
if RUBY_PLATFORM =~/darwin/
%x(open -a safari #{fn})
else
%x(xdg-open #{fn})
end
end
def boxplot(*args)
show_svg(old_boxplot(*args).to_svg)
end
def histogram(*args)
show_svg(old_histogram(*args).to_svg)
end
def scatterplot(*args)
show_svg(old_scatterplot(*args).to_svg)
end
def method_missing(name, *args,&block)
@attached.reverse.each do |ds|
return ds[name.to_s] if ds.fields.include? (name.to_s)
end
raise "Method #{name} doesn't exists"
end
end
end
end
================================================
FILE: lib/statsample/analysis/suitereportbuilder.rb
================================================
module Statsample
module Analysis
class SuiteReportBuilder < Suite
attr_accessor :rb
def initialize(opts=Hash.new,&block)
if !opts.is_a? Hash
opts={:name=>opts}
end
super(opts,&block)
@rb=opts[:rb] || ReportBuilder.new(:name=>name)
end
def generate(filename)
run if @block
@rb.save(filename)
end
def to_text
run if @block
@rb.to_text
end
def summary(o)
@rb.add(o)
end
def desc(d)
@rb.add(d)
end
def echo(*args)
args.each do |a|
@rb.add(a)
end
end
def boxplot(*args)
@rb.add(old_boxplot(*args))
end
def histogram(*args)
@rb.add(old_histogram(*args))
end
def boxplot(*args)
@rb.add(old_boxplot(*args))
end
end
end
end
================================================
FILE: lib/statsample/analysis.rb
================================================
require 'statsample/analysis/suite'
require 'statsample/analysis/suitereportbuilder'
module Statsample
# DSL to create analysis without hazzle.
# * Shortcuts methods to avoid use complete namescapes, many based on R
# * Attach/detach vectors to workspace, like R
# == Example
# an1=Statsample::Analysis.store(:first) do
# # Load excel file with x,y,z vectors
# ds=excel('data.xls')
# # See variables on ds dataset
# names(ds)
# # Attach the vectors to workspace, like R
# attach(ds)
# # vector 'x' is attached to workspace like a method,
# # so you can use like any variable
# mean,sd=x.mean, x.sd
# # Shameless R robbery
# a=c( 1:10)
# b=c(21:30)
# summary(cor(ds)) # Call summary method on correlation matrix
# end
# # You can run the analysis by its name
# Statsample::Analysis.run(:first)
# # or using the returned variables
# an1.run
# # You can also generate a report using ReportBuilder.
# # .summary() method call 'report_building' on the object,
# # instead of calling text summary
# an1.generate("report.html")
module Analysis
@@stored_analysis={}
@@last_analysis=nil
def self.clear_analysis
@@stored_analysis.clear
end
def self.stored_analysis
@@stored_analysis
end
def self.last
@@stored_analysis[@@last_analysis]
end
def self.store(name, opts=Hash.new,&block)
raise "You should provide a block" if !block
@@last_analysis=name
opts={:name=>name}.merge(opts)
@@stored_analysis[name]=Suite.new(opts,&block)
end
# Run analysis +*args+
# Without arguments, run all stored analysis
# Only 'echo' will be returned to screen
def self.run(*args)
args=stored_analysis.keys if args.size==0
raise "Analysis #{args} doesn't exists" if (args - stored_analysis.keys).size>0
args.each do |name|
stored_analysis[name].run
end
end
# Add analysis +*args+ to an reportbuilder object.
# Without arguments, add all stored analysis
# Each analysis is wrapped inside a ReportBuilder::Section object
# This is the method is used by save() and to_text()
def self.add_to_reportbuilder(rb, *args)
args=stored_analysis.keys if args.size==0
raise "Analysis #{name} doesn't exists" if (args - stored_analysis.keys).size>0
args.each do |name|
section=ReportBuilder::Section.new(:name=>stored_analysis[name].name)
rb_an=stored_analysis[name].add_to_reportbuilder(section)
rb.add(section)
rb_an.run
end
end
# Save the analysis on a file
# Without arguments, add all stored analysis
def self.save(filename, *args)
rb=ReportBuilder.new(:name=>filename)
add_to_reportbuilder(rb, *args)
rb.save(filename)
end
# Run analysis and return as string
# output of echo callings
# Without arguments, add all stored analysis
def self.to_text(*args)
rb=ReportBuilder.new(:name=>"Analysis #{Time.now}")
add_to_reportbuilder(rb, *args)
rb.to_text
end
# Run analysis and return to screen all
# echo and summary callings
def self.run_batch(*args)
puts to_text(*args)
end
end
end
================================================
FILE: lib/statsample/anova/contrast.rb
================================================
module Statsample
module Anova
class Contrast
attr_reader :psi
attr_reader :msw
include Summarizable
def initialize(opts=Hash.new)
raise "Should set at least vectors options" if opts[:vectors].nil?
@vectors=opts[:vectors]
@c=opts[:c]
@c1,@c2=opts[:c1], opts[:c2]
@t_options=opts[:t_options] || {:estimate_name=>_("Psi estimate")}
@name=opts[:name] || _("Contrast")
@psi=nil
@anova=Statsample::Anova::OneWayWithVectors.new(@vectors)
@msw=@anova.msw
end
# Hypothesis contrast, selecting index for each constrast
# For example, if you want to contrast x_0 against x_1 and x_2
# you should use
# c.contrast([0],[1,2])
def c_by_index(c1,c2)
contrast=[0]*@vectors.size
c1.each {|i| contrast[i]=1.quo(c1.size)}
c2.each {|i| contrast[i]=-1.quo(c2.size)}
@c=contrast
c(contrast)
end
def psi
if @psi.nil?
c(@c) if @c
c_by_index(@c1,@c2) if (@c1 and @c2)
end
@psi
end
def confidence_interval(cl=nil)
t_object.confidence_interval(cl)
end
# Hypothesis contrast, using custom values
# Every parameter is a contrast value. You should use
# the same number of contrast as vectors on class and the sum
# of constrast should be 0.
def c(args=nil)
return @c if args.nil?
@c=args
raise "contrast number!=vector number" if args.size!=@vectors.size
#raise "Sum should be 0" if args.inject(0) {|ac,v| ac+v}!=0
@psi=args.size.times.inject(0) {|ac,i| ac+(args[i]*@vectors[i].mean)}
end
def standard_error
sum=@vectors.size.times.inject(0) {|ac,i|
ac+((@c[i].rationalize**2).quo(@vectors[i].size))
}
Math.sqrt(@msw*sum)
end
alias :se :standard_error
def df
@vectors.inject(0) {|ac,v| ac+v.size}-@vectors.size
end
def t_object
Statsample::Test::T.new(psi, se, df, @t_options)
end
def t
t_object.t
end
def probability
t_object.probability
end
def report_building(builder)
builder.section(:name=>@name) do |s|
s.text _("Contrast:%s") % c.join(",")
s.parse_element(t_object)
end
end
end
end
end
================================================
FILE: lib/statsample/anova/oneway.rb
================================================
module Statsample
module Anova
# = Generic Anova one-way.
# You could enter the sum of squares or the mean squares. You
# should enter the degrees of freedom for numerator and denominator.
# == Usage
# anova=Statsample::Anova::OneWay(:ss_num=>10,:ss_den=>20, :df_num=>2, :df_den=>10, @name=>"ANOVA for....")
class OneWay
include Summarizable
attr_reader :df_num, :df_den, :ss_num, :ss_den, :ms_num, :ms_den, :ms_total, :df_total, :ss_total
# Name of ANOVA Analisys
attr_accessor :name
attr_accessor :name_denominator
attr_accessor :name_numerator
def initialize(opts=Hash.new)
@name=@name_numerator=@name_denominator=nil
# First see if sum of squares or mean squares are entered
raise ArgumentError, "You should set d.f." unless (opts.has_key? :df_num and opts.has_key? :df_den)
@df_num=opts.delete :df_num
@df_den=opts.delete :df_den
@df_total=@df_num+@df_den
if(opts.has_key? :ss_num and opts.has_key? :ss_den)
@ss_num = opts.delete :ss_num
@ss_den =opts.delete :ss_den
@ms_num =@ss_num.quo(@df_num)
@ms_den =@ss_den.quo(@df_den)
elsif (opts.has_key? :ms_num and opts.has_key? :ms_den)
@ms_num =opts.delete :ms_num
@ms_den =opts.delete :ms_den
@ss_num =@ms_num * @df_num
@ss_den =@ss_den * @df_den
end
@ss_total=@ss_num+@ss_den
@ms_total=@ms_num+@ms_den
opts_default={:name=>"ANOVA",
:name_denominator=>_("Explained variance"),
:name_numerator=>_("Unexplained variance")}
@opts=opts_default.merge(opts)
opts.keys.each {|k|
send("#{k}=", @opts[k]) if self.respond_to? "#{k}="
}
@f_object=Statsample::Test::F.new(@ms_num, @ms_den, @df_num,@df_den)
end
# F value
def f
@f_object.f
end
# P-value of F test
def probability
@f_object.probability
end
def report_building(builder) #:nodoc:
builder.section(:name=>@name) do |b|
report_building_table(b)
end
end
def report_building_table(builder) #:nodoc:
builder.table(:name=>_("%s Table") % @name, :header=>%w{source ss df ms f p}.map {|v| _(v)}) do |t|
t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",@ms_num), sprintf("%0.3f",f), sprintf("%0.3f", probability)])
t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, sprintf("%0.3f",@ms_den), "", ""])
t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total, sprintf("%0.3f",@ms_total),"",""])
end
end
end
# One Way Anova with vectors
# Example:
# v1=[2,3,4,5,6].to_scale
# v2=[3,3,4,5,6].to_scale
# v3=[5,3,1,5,6].to_scale
# anova=Statsample::Anova::OneWayWithVectors.new([v1,v2,v3])
# anova.f
# => 0.0243902439024391
# anova.probability
# => 0.975953044203438
# anova.sst
# => 32.9333333333333
#
class OneWayWithVectors < OneWay
# Show on summary Levene test
attr_accessor :summary_levene
# Show on summary descriptives for vectors
attr_accessor :summary_descriptives
# Show on summary of contrasts
attr_accessor :summary_contrasts
# Array with stored contrasts
attr_reader :contrasts
def initialize(*args)
if args[0].is_a? Array
@vectors=args.shift
else
@vectors=args.find_all {|v| v.is_a? Statsample::Vector}
opts=args.find {|v| v.is_a? Hash}
end
opts||=Hash.new
opts_default={:name=>_("Anova One-Way"),
:name_numerator=>_("Between Groups"),
:name_denominator=>_("Within Groups"),
:summary_descriptives=>false,
:summary_levene=>true,
:summary_contrasts=>true
}
@opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
@contrasts=[]
super(@opts)
end
alias :sst :ss_total
alias :msb :ms_num
alias :msw :ms_den
# Generates and store a contrast.
# Options should be provided as a hash
# [:c]=>contrast vector
# [:c1 - :c2]=>index for automatic construction of contrast
# [:name]=>contrast name
def contrast(opts=Hash.new)
name=opts[:name] || _("Contrast for %s") % @name
opts=opts.merge({:vectors=>@vectors, :name=>name})
c=Statsample::Anova::Contrast.new(opts)
@contrasts.push(c)
c
end
def levene
Statsample::Test.levene(@vectors, :name=>_("Test of Homogeneity of variances (Levene)"))
end
# Total mean
def total_mean
sum=@vectors.inject(0){|a,v| a+v.sum}
sum.quo(n)
end
# Sum of squares within groups
def sswg
@sswg||=@vectors.inject(0) {|total,vector| total+vector.ss }
end
# Sum of squares between groups
def ssbg
m=total_mean
@vectors.inject(0) do |total,vector|
total + (vector.mean-m).square * vector.size
end
end
# Degrees of freedom within groups
def df_wg
@dk_wg||=n-k
end
def k
@k||=@vectors.size
end
# Degrees of freedom between groups
def df_bg
k-1
end
# Total number of cases
def n
@vectors.inject(0){|a,v| a+v.size}
end
def report_building(builder) # :nodoc:
builder.section(:name=>@name) do |s|
if summary_descriptives
s.table(:name=>_("Descriptives"),:header=>%w{Name N Mean SD Min Max}.map {|v| _(v)}) do |t|
@vectors.each do |v|
t.row [v.name, v.n_valid, "%0.4f" % v.mean, "%0.4f" % v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
end
end
end
if summary_levene
s.parse_element(levene)
end
report_building_table(s)
if summary_contrasts and @contrasts.size>0
@contrasts.each do |c|
s.parse_element(c)
end
end
end
end
end
end
end
================================================
FILE: lib/statsample/anova/twoway.rb
================================================
module Statsample
module Anova
# = Generic Anova two-way.
# You could enter the sum of squares or the mean squares for a, b, axb and within.
# You should enter the degrees of freedom for a,b and within, because df_axb=df_a*df_b
# == Usage
# anova=Statsample::Anova::TwoWay(:ss_a=>10,:ss_b=>20,:ss_axb=>10, :ss_within=>20, :df_a=>2, :df_b=>3,df_within=100 @name=>"ANOVA for....")
class TwoWay
include Summarizable
attr_reader :df_a, :df_b, :df_axb, :df_within, :df_total
attr_reader :ss_a, :ss_b, :ss_axb, :ss_within, :ss_total
attr_reader :ms_a, :ms_b, :ms_axb, :ms_within, :ms_total
# Name of ANOVA Analisys
attr_accessor :name
# Name of a factor
attr_accessor :name_a
# Name of b factor
attr_accessor :name_b
# Name of within factor
attr_accessor :name_within
attr_reader :f_a_object, :f_b_object, :f_axb_object
def initialize(opts=Hash.new)
# First see if sum of squares or mean squares are entered
raise ArgumentError, "You should set all d.f." unless [:df_a, :df_b, :df_within].all? {|v| opts.has_key? v}
@df_a=opts.delete :df_a
@df_b=opts.delete :df_b
@df_axb=@df_a*@df_b
@df_within=opts.delete :df_within
@df_total=@df_a+@df_b+@df_axb+@df_within
if [:ss_a, :ss_b, :ss_axb, :ss_within].all? {|v| opts.has_key? v}
@ss_a = opts.delete :ss_a
@ss_b = opts.delete :ss_b
@ss_axb = opts.delete :ss_axb
@ss_within = opts.delete :ss_within
@ms_a =@ss_a.quo(@df_a)
@ms_b =@ss_b.quo(@df_b)
@ms_axb =@ss_axb.quo(@df_axb)
@ms_within =@ss_within.quo(@df_within)
elsif [:ms_a, :ms_b, :ms_axb, :ms_within].all? {|v| opts.has_key? v}
@ms_a = opts.delete :ms_a
@ms_b = opts.delete :ms_b
@ms_axb = opts.delete :ms_axb
@ms_within = opts.delete :ms_within
@ss_a =@ms_a*@df_a
@ss_b =@ms_b*@df_b
@ss_axb =@ms_axb*@df_axb
@ss_within =@ms_within*@df_within
else
raise "You should set all ss or ss"
end
@ss_total=@ss_a+@ss_b+@ss_axb+@ss_within
@ms_total=@ms_a+@ms_b+@ms_axb+@ms_within
opts_default={:name=>_("ANOVA Two-Way"),
:name_a=>_("A"),
:name_b=>_("B"),
:name_within=>_("Within")
}
@opts=opts_default.merge(opts)
opts_default.keys.each {|k|
send("#{k}=", @opts[k])
}
@f_a_object=Statsample::Test::F.new(@ms_a,@ms_within,@df_a,@df_within)
@f_b_object=Statsample::Test::F.new(@ms_b,@ms_within,@df_b,@df_within)
@f_axb_object=Statsample::Test::F.new(@ms_axb,@ms_within,@df_axb,@df_within)
end
def f_a
@f_a_object.f
end
def f_b
@f_b_object.f
end
def f_axb
@f_axb_object.f
end
def f_a_probability
@f_a_object.probability
end
def f_b_probability
@f_b_object.probability
end
def f_axb_probability
@f_axb_object.probability
end
def report_building(builder) #:nodoc:
builder.section(:name=>@name) do |b|
report_building_table(b)
end
end
def report_building_table(builder) #:nodoc:
builder.table(:name=>_("%s Table") % @name, :header=>%w{source ss df ms f p}.map {|v| _(v)}) do |t|
t.row([@name_a, "%0.3f" % @ss_a, @df_a, "%0.3f" % @ms_a , "%0.3f" % f_a, "%0.4f" % f_a_probability] )
t.row([@name_b, "%0.3f" % @ss_b, @df_b, "%0.3f" % @ms_b , "%0.3f" % f_b, "%0.4f" % f_b_probability] )
t.row(["%s X %s" % [@name_a, @name_b], "%0.3f" % @ss_axb, @df_axb, "%0.3f" % @ms_axb , "%0.3f" % f_axb, "%0.4f" % f_axb_probability] )
t.row([@name_within, "%0.3f" % @ss_within, @df_within, nil,nil,nil] )
t.row([_("Total"), "%0.3f" % @ss_total, @df_total, nil,nil,nil] )
end
end
end
# Two Way Anova with vectors
# Example:
# v1=[1,1,2,2].to_scale
# v2=[1,2,1,2].to_scale
# v3=[5,3,1,5].to_scale
# anova=Statsample::Anova::TwoWayWithVectors.new(:a=>v1,:b=>v2, :dependent=>v3)
#
class TwoWayWithVectors < TwoWay
# Show summary Levene test
attr_accessor :summary_levene
# Show summary descriptives for variables (means)
attr_accessor :summary_descriptives
attr_reader :a_var, :b_var, :dep_var
# For now, only equal sample cells allowed
def initialize(opts=Hash.new)
raise "You should insert at least :a, :b and :dependent" unless [:a, :b, :dependent].all? {|v| opts.has_key? v}
@a_var='a'
@b_var='b'
@dep_var='dependent'
@a_vector, @b_vector, @dep_vector=Statsample.only_valid_clone opts[:a], opts[:b], opts[:dependent]
ds={@a_var=>@a_vector, @b_var=>@b_vector, @dep_var=>@dep_vector}.to_dataset
@ds=ds.clone_only_valid
_p=@a_vector.factors.size
_q=@b_vector.factors.size
@x_general=@dep_vector.mean
@axb_means={}
@axb_sd={}
@vectors=[]
n=nil
@ds.to_multiset_by_split(a_var,b_var).each_vector(dep_var) {|k,v|
@axb_means[k]=v.mean
@axb_sd[k]=v.sd
@vectors << v
n||=v.size
raise "All cell sizes should be equal" if n!=v.size
}
@a_means={}
@ds.to_multiset_by_split(a_var).each_vector(dep_var) {|k,v|
@a_means[k]=v.mean
}
@b_means={}
@ds.to_multiset_by_split(b_var).each_vector(dep_var) {|k,v|
@b_means[k]=v.mean
}
ss_a=n*_q*@ds[a_var].factors.inject(0) {|ac,v|
ac+(@a_means[v]-@x_general)**2
}
ss_b=n*_p*@ds[b_var].factors.inject(0) {|ac,v|
ac+(@b_means[v]-@x_general)**2
}
ss_within=@ds.collect {|row|
(row[dep_var]-@axb_means[[row[a_var],row[b_var]]])**2
}.sum
ss_axb=n*@axb_means.inject(0) {|ac,v|
j,k=v[0]
xjk=v[1]
ac+(xjk-@a_means[j]-@b_means[k]+@x_general)**2
}
df_a=_p-1
df_b=_q-1
df_within=(_p*_q)*(n-1)
opts_default={:name=>_("Anova Two-Way on %s") % @ds[dep_var].name,
:name_a=>@ds[a_var].name,
:name_b=>@ds[b_var].name,
:summary_descriptives=>true,
:summary_levene=>false}
@opts=opts_default.merge(opts).merge({:ss_a=>ss_a,:ss_b=>ss_b, :ss_axb=>ss_axb, :ss_within=>ss_within, :df_a=>df_a, :df_b=>df_b, :df_within=>df_within})
super(@opts)
end
def levene
Statsample::Test.levene(@vectors, :name=>_("Test of Homogeneity of variances (Levene)"))
end
def report_building(builder) #:nodoc:#
builder.section(:name=>@name) do |s|
if summary_descriptives
s.table(:header =>['']+@ds[a_var].factors.map {|a| @ds[a_var].labeling(a)}+[_("%s Mean") % @name_b]) do |t|
@ds[b_var].factors.each do |b|
t.row([@ds[b_var].labeling(b)]+@ds[a_var].factors.map {|a| "%0.3f" % @axb_means[[a,b]] } + ["%0.3f" % @b_means[b]])
end
t.row([_("%s Mean") % @name_a]+@ds[a_var].factors.map {|a| "%0.3f" % @a_means[a]}+ ["%0.3f" % @x_general])
end
end
if summary_levene
s.parse_element(levene)
end
report_building_table(s)
end
end
end
end
end
================================================
FILE: lib/statsample/anova.rb
================================================
module Statsample
module Anova
class << self
def oneway(*args)
OneWay.new(*args)
end
def twoway(*args)
TwoWay.new(*args)
end
def oneway_with_vectors(*args)
OneWayWithVectors.new(*args)
end
def twoway_with_vectors(*args)
TwoWayWithVectors.new(*args)
end
end
end
end
require 'statsample/anova/oneway'
require 'statsample/anova/contrast'
require 'statsample/anova/twoway'
================================================
FILE: lib/statsample/bivariate/pearson.rb
================================================
module Statsample
module Bivariate
# = Pearson correlation coefficient (r)
#
# The moment-product Pearson's correlation coefficient, known as 'r'
# is a measure of bivariate associate between two continous
# variables.
#
# == Usage
# a = [1,2,3,4,5,6].to_scale
# b = [2,3,4,5,6,7].to_scale
# pearson = Statsample::Bivariate::Pearson.new(a,b)
# puts pearson.r
# puts pearson.t
# puts pearson.probability
# puts pearson.summary
#
class Pearson
include Statsample::Test
include Summarizable
# Name of correlation
attr_accessor :name
# Tails for probability (:both, :left or :right)
attr_accessor :tails
attr_accessor :n
def initialize(v1,v2,opts=Hash.new)
@v1_name,@v2_name = v1.name,v2.name
@v1,@v2 = Statsample.only_valid_clone(v1,v2)
@n=@v1.size
opts_default={
:name=>_("Correlation (%s - %s)") % [@v1_name, @v2_name],
:tails=>:both
}
@opts=opts.merge(opts_default)
@opts.each{|k,v|
self.send("#{k}=",v) if self.respond_to? k
}
end
def r
Statsample::Bivariate.pearson(@v1,@v2)
end
def t
Statsample::Bivariate.t_pearson(@v1,@v2)
end
def probability
p_using_cdf(Distribution::T.cdf(t, @v1.size-2), tails)
end
def report_building(builder)
builder.text(_("%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)") % [@name, r,t, (n-2), probability, tails])
end
end
end
end
================================================
FILE: lib/statsample/bivariate.rb
================================================
require 'statsample/bivariate/pearson'
module Statsample
# Diverse methods and classes to calculate bivariate relations
# Specific classes:
# * Statsample::Bivariate::Pearson : Pearson correlation coefficient (r)
# * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
# * Statsample::Bivariate::Polychoric : Polychoric correlation (using joint, two-step and polychoric series)
module Bivariate
autoload(:Polychoric, 'statsample/bivariate/polychoric')
autoload(:Tetrachoric, 'statsample/bivariate/tetrachoric')
class << self
# Covariance between two vectors
def covariance(v1,v2)
v1a,v2a=Statsample.only_valid_clone(v1,v2)
return nil if v1a.size==0
if Statsample.has_gsl?
GSL::Stats::covariance(v1a.gsl, v2a.gsl)
else
covariance_slow(v1a,v2a)
end
end
# Estimate the ML between two dichotomic vectors
def maximum_likehood_dichotomic(pred,real)
preda,reala=Statsample.only_valid_clone(pred,real)
sum=0
preda.each_index{|i|
sum+=(reala[i]*Math::log(preda[i])) + ((1-reala[i])*Math::log(1-preda[i]))
}
sum
end
def covariance_slow(v1,v2) # :nodoc:
v1a,v2a=Statsample.only_valid(v1,v2)
sum_of_squares(v1a,v2a) / (v1a.size-1)
end
def sum_of_squares(v1,v2)
v1a,v2a=Statsample.only_valid_clone(v1,v2)
m1=v1a.mean
m2=v2a.mean
(v1a.size).times.inject(0) {|ac,i| ac+(v1a[i]-m1)*(v2a[i]-m2)}
end
# Calculate Pearson correlation coefficient (r) between 2 vectors
def pearson(v1,v2)
v1a,v2a=Statsample.only_valid_clone(v1,v2)
return nil if v1a.size ==0
if Statsample.has_gsl?
GSL::Stats::correlation(v1a.gsl, v2a.gsl)
else
pearson_slow(v1a,v2a)
end
end
def pearson_slow(v1,v2) # :nodoc:
v1a,v2a=Statsample.only_valid_clone(v1,v2)
# Calculate sum of squares
ss=sum_of_squares(v1a,v2a)
ss.quo(Math::sqrt(v1a.sum_of_squares) * Math::sqrt(v2a.sum_of_squares))
end
alias :correlation :pearson
# Retrieves the value for t test for a pearson correlation
# between two vectors to test the null hipothesis of r=0
def t_pearson(v1,v2)
v1a,v2a=Statsample.only_valid_clone(v1,v2)
r=pearson(v1a,v2a)
if(r==1.0)
0
else
t_r(r,v1a.size)
end
end
# Retrieves the value for t test for a pearson correlation
# giving r and vector size
# Source : http://faculty.chass.ncsu.edu/garson/PA765/correl.htm
def t_r(r,size)
r * Math::sqrt(((size)-2).to_f / (1 - r**2))
end
# Retrieves the probability value (a la SPSS)
# for a given t, size and number of tails.
# Uses a second parameter
# * :both or 2 : for r!=0 (default)
# * :right, :positive or 1 : for r > 0
# * :left, :negative : for r < 0
def prop_pearson(t, size, tails=:both)
tails=:both if tails==2
tails=:right if tails==1 or tails==:positive
tails=:left if tails==:negative
n_tails=case tails
when :both then 2
else 1
end
t=-t if t>0 and (tails==:both)
cdf=Distribution::T.cdf(t, size-2)
if(tails==:right)
1.0-(cdf*n_tails)
else
cdf*n_tails
end
end
# Predicted time for pairwise correlation matrix, in miliseconds
# See benchmarks/correlation_matrix.rb to see mode of calculation
def prediction_pairwise(vars,cases)
((-0.518111-0.000746*cases+1.235608*vars+0.000740*cases*vars)**2) / 100
end
# Predicted time for optimized correlation matrix, in miliseconds
# See benchmarks/correlation_matrix.rb to see mode of calculation
def prediction_optimized(vars,cases)
((4+0.018128*cases+0.246871*vars+0.001169*vars*cases)**2) / 100
end
# Returns residual score after delete variance
# from another variable
#
def residuals(from,del)
r=Statsample::Bivariate.pearson(from,del)
froms, dels = from.vector_standarized, del.vector_standarized
nv=[]
froms.data_with_nils.each_index do |i|
if froms[i].nil? or dels[i].nil?
nv.push(nil)
else
nv.push(froms[i]-r*dels[i])
end
end
nv.to_vector(:scale)
end
# Correlation between v1 and v2, controling the effect of
# control on both.
def partial_correlation(v1,v2,control)
v1a,v2a,cona=Statsample.only_valid_clone(v1,v2,control)
rv1v2=pearson(v1a,v2a)
rv1con=pearson(v1a,cona)
rv2con=pearson(v2a,cona)
(rv1v2-(rv1con*rv2con)).quo(Math::sqrt(1-rv1con**2) * Math::sqrt(1-rv2con**2))
end
def covariance_matrix_optimized(ds)
x=ds.to_gsl
n=x.row_size
m=x.column_size
means=((1/n.to_f)*GSL::Matrix.ones(1,n)*x).row(0)
centered=x-(GSL::Matrix.ones(n,m)*GSL::Matrix.diag(means))
ss=centered.transpose*centered
s=((1/(n-1).to_f))*ss
s
end
# Covariance matrix.
# Order of rows and columns depends on Dataset#fields order
def covariance_matrix(ds)
vars,cases=ds.fields.size,ds.cases
if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
cm=covariance_matrix_optimized(ds)
else
cm=covariance_matrix_pairwise(ds)
end
cm.extend(Statsample::CovariateMatrix)
cm.fields=ds.fields
cm
end
def covariance_matrix_pairwise(ds)
cache={}
matrix=ds.collect_matrix do |row,col|
if (ds[row].type!=:scale or ds[col].type!=:scale)
nil
elsif row==col
ds[row].variance
else
if cache[[col,row]].nil?
cov=covariance(ds[row],ds[col])
cache[[row,col]]=cov
cov
else
cache[[col,row]]
end
end
end
matrix
end
# Correlation matrix.
# Order of rows and columns depends on Dataset#fields order
def correlation_matrix(ds)
vars,cases=ds.fields.size,ds.cases
if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
cm=correlation_matrix_optimized(ds)
else
cm=correlation_matrix_pairwise(ds)
end
cm.extend(Statsample::CovariateMatrix)
cm.fields=ds.fields
cm
end
def correlation_matrix_optimized(ds)
s=covariance_matrix_optimized(ds)
sds=GSL::Matrix.diagonal(s.diagonal.sqrt.pow(-1))
cm=sds*s*sds
# Fix diagonal
s.row_size.times {|i|
cm[i,i]=1.0
}
cm
end
def correlation_matrix_pairwise(ds)
cache={}
cm=ds.collect_matrix do |row,col|
if row==col
1.0
elsif (ds[row].type!=:scale or ds[col].type!=:scale)
nil
else
if cache[[col,row]].nil?
r=pearson(ds[row],ds[col])
cache[[row,col]]=r
r
else
cache[[col,row]]
end
end
end
end
# Retrieves the n valid pairwise.
def n_valid_matrix(ds)
ds.collect_matrix do |row,col|
if row==col
ds[row].valid_data.size
else
rowa,rowb=Statsample.only_valid_clone(ds[row],ds[col])
rowa.size
end
end
end
# Matrix of correlation probabilities.
# Order of rows and columns depends on Dataset#fields order
def correlation_probability_matrix(ds, tails=:both)
rows=ds.fields.collect do |row|
ds.fields.collect do |col|
v1a,v2a=Statsample.only_valid_clone(ds[row],ds[col])
(row==col or ds[row].type!=:scale or ds[col].type!=:scale) ? nil : prop_pearson(t_pearson(ds[row],ds[col]), v1a.size, tails)
end
end
Matrix.rows(rows)
end
# Spearman ranked correlation coefficient (rho) between 2 vectors
def spearman(v1,v2)
v1a,v2a=Statsample.only_valid_clone(v1,v2)
v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
pearson(v1r,v2r)
end
# Calculate Point biserial correlation. Equal to Pearson correlation, with
# one dichotomous value replaced by "0" and the other by "1"
def point_biserial(dichotomous,continous)
ds={'d'=>dichotomous,'c'=>continous}.to_dataset.dup_only_valid
raise(TypeError, "First vector should be dichotomous") if ds['d'].factors.size!=2
raise(TypeError, "Second vector should be continous") if ds['c'].type!=:scale
f0=ds['d'].factors.sort[0]
m0=ds.filter_field('c') {|c| c['d']==f0}
m1=ds.filter_field('c') {|c| c['d']!=f0}
((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
end
# Kendall Rank Correlation Coefficient (Tau a)
# Based on Hervé Adbi article
def tau_a(v1,v2)
v1a,v2a=Statsample.only_valid_clone(v1,v2)
n=v1.size
v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
o1=ordered_pairs(v1r)
o2=ordered_pairs(v2r)
delta= o1.size*2-(o2 & o1).size*2
1-(delta * 2 / (n*(n-1)).to_f)
end
# Calculates Goodman and Kruskal’s Tau b correlation.
# Tb is an asymmetric P-R-E measure of association for nominal scales
# (Mielke, X)
#
# Tau-b defines perfect association as strict monotonicity. Although it
# requires strict monotonicity to reach 1.0, it does not penalize ties as
# much as some other measures.
# == Reference
# Mielke, P. GOODMAN–KRUSKAL TAU AND GAMMA.
# Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
def tau_b(matrix)
v=pairs(matrix)
((v['P']-v['Q']).to_f / Math::sqrt((v['P']+v['Q']+v['Y'])*(v['P']+v['Q']+v['X'])).to_f)
end
# Calculates Goodman and Kruskal's gamma.
#
# Gamma is the surplus of concordant pairs over discordant pairs, as a
# percentage of all pairs ignoring ties.
#
# Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
def gamma(matrix)
v=pairs(matrix)
(v['P']-v['Q']).to_f / (v['P']+v['Q']).to_f
end
# Calculate indexes for a matrix the rows and cols has to be ordered
def pairs(matrix)
# calculate concordant #p matrix
rs=matrix.row_size
cs=matrix.column_size
conc=disc=ties_x=ties_y=0
(0...(rs-1)).each do |x|
(0...(cs-1)).each do |y|
((x+1)...rs).each do |x2|
((y+1)...cs).each do |y2|
# #p sprintf("%d:%d,%d:%d",x,y,x2,y2)
conc+=matrix[x,y]*matrix[x2,y2]
end
end
end
end
(0...(rs-1)).each {|x|
(1...(cs)).each{|y|
((x+1)...rs).each{|x2|
(0...y).each{|y2|
# #p sprintf("%d:%d,%d:%d",x,y,x2,y2)
disc+=matrix[x,y]*matrix[x2,y2]
}
}
}
}
(0...(rs-1)).each {|x|
(0...(cs)).each{|y|
((x+1)...(rs)).each{|x2|
ties_x+=matrix[x,y]*matrix[x2,y]
}
}
}
(0...rs).each {|x|
(0...(cs-1)).each{|y|
((y+1)...(cs)).each{|y2|
ties_y+=matrix[x,y]*matrix[x,y2]
}
}
}
{'P'=>conc,'Q'=>disc,'Y'=>ties_y,'X'=>ties_x}
end
def ordered_pairs(vector)
d=vector.data
a=[]
(0...(d.size-1)).each{|i|
((i+1)...(d.size)).each {|j|
a.push([d[i],d[j]])
}
}
a
end
=begin
def sum_of_codeviated(v1,v2)
v1a,v2a=Statsample.only_valid(v1,v2)
sum=0
(0...v1a.size).each{|i|
sum+=v1a[i]*v2a[i]
}
sum-((v1a.sum*v2a.sum) / v1a.size.to_f)
end
=end
# Report the minimum number of cases valid of a covariate matrix
# based on a dataset
def min_n_valid(ds)
min=ds.cases
m=n_valid_matrix(ds)
for x in 0...m.row_size
for y in 0...m.column_size
min=m[x,y] if m[x,y] < min
end
end
min
end
end
end
end
================================================
FILE: lib/statsample/codification.rb
================================================
require 'yaml'
module Statsample
# This module aids to code open questions
# * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
# * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
# * Recode the vectors, loading the yaml file:
# * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
# * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
#
# Usage:
# recode_file="recodification.yaml"
# phase=:first # flag
# if phase==:first
# File.open(recode_file,"w") {|fp|
# Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
# }
# # Edit the file recodification.yaml and verify changes
# elsif phase==:second
# File.open(recode_file,"r") {|fp|
# Statsample::Codification.verify(fp,['vector1'])
# }
# # Add new vectors to the dataset
# elsif phase==:third
# File.open(recode_file,"r") {|fp|
# Statsample::Codification.recode_dataset_split!(ds,fp,"*")
# }
# end
#
module Codification
class << self
# Create a hash, based on vectors, to create the dictionary.
# The keys will be vectors name on dataset and the values
# will be hashes, with keys = values, for recodification
def create_hash(dataset, vectors, sep=Statsample::SPLIT_TOKEN)
raise ArgumentError,"Array should't be empty" if vectors.size==0
pro_hash=vectors.inject({}){|h,v_name|
raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
v=dataset[v_name]
split_data=v.splitted(sep).flatten.collect {|c| c.to_s}.find_all {|c| !c.nil?}
factors=split_data.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac }
h[v_name]=factors
h
}
pro_hash
end
# Create a yaml to create a dictionary, based on vectors
# The keys will be vectors name on dataset and the values
# will be hashes, with keys = values, for recodification
#
# v1=%w{a,b b,c d}.to_vector
# ds={"v1"=>v1}.to_dataset
# Statsample::Codification.create_yaml(ds,['v1'])
# => "--- \nv1: \n a: a\n b: b\n c: c\n d: d\n"
def create_yaml(dataset, vectors, io=nil, sep=Statsample::SPLIT_TOKEN)
pro_hash=create_hash(dataset, vectors, sep)
YAML.dump(pro_hash,io)
end
# Create a excel to create a dictionary, based on vectors.
# Raises an error if filename exists
# The rows will be:
# * field: name of vector
# * original: original name
# * recoded: new code
def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
require 'spreadsheet'
if File.exist?(filename)
raise "Exists a file named #{filename}. Delete ir before overwrite."
end
book = Spreadsheet::Workbook.new
sheet = book.create_worksheet
sheet.row(0).concat(%w{field original recoded})
i=1
create_hash(dataset, vectors, sep).sort.each do |field, inner_hash|
inner_hash.sort.each do |k,v|
sheet.row(i).concat([field.dup,k.dup,v.dup])
i+=1
end
end
book.write(filename)
end
# From a excel generates a dictionary hash
# to use on recode_dataset_simple!() or recode_dataset_split!().
#
def excel_to_recoded_hash(filename)
require 'spreadsheet'
h={}
book = Spreadsheet.open filename
sheet= book.worksheet 0
row_i=0
sheet.each do |row|
row_i+=1
next if row_i==1 or row[0].nil? or row[1].nil? or row[2].nil?
h[row[0]]={} if h[row[0]].nil?
h[row[0]][row[1]]=row[2]
end
h
end
def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
h.inject({}) do |a,v|
v[1].split(sep).each do |val|
a[val]||=[]
a[val].push(v[0])
end
a
end
end
def dictionary(h, sep=Statsample::SPLIT_TOKEN)
h.inject({}) {|a,v| a[v[0]]=v[1].split(sep); a }
end
def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
dict=dictionary(h,sep)
new_data=v.splitted(sep)
new_data.collect do |c|
if c.nil?
nil
else
c.collect{|value| dict[value] }.flatten.uniq
end
end
end
def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
_recode_dataset(dataset,dictionary_hash ,sep,false)
end
def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
_recode_dataset(dataset, dictionary_hash, sep,true)
end
def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
v_names||=h.keys
v_names.each do |v_name|
raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
if c.nil?
nil
else
c.join(sep)
end
}.to_vector
if(split)
recoded.split_by_separator(sep).each {|k,v|
dataset[v_name+"_"+k]=v
}
else
dataset[v_name+"_recoded"]=recoded
end
end
end
def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
require 'pp'
v_names||=h.keys
v_names.each{|v_name|
inverse=inverse_hash(h[v_name],sep)
io.puts "- Field: #{v_name}"
inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
}
}
end
end
end
end
================================================
FILE: lib/statsample/converter/csv.rb
================================================
module Statsample
class CSV < SpreadsheetBase
if RUBY_VERSION<"1.9"
require 'fastercsv'
CSV_klass=::FasterCSV
else
require 'csv'
CSV_klass=::CSV
end
class << self
def read19(filename,ignore_lines=0,csv_opts=Hash.new)
#default first line is header
csv_opts.merge!(:headers=>true, :header_converters => :symbol)
csv = CSV_klass::Table.new(CSV_klass::read(filename,'r',csv_opts))
csv_headers = if csv_opts[:headers]
csv.headers
else
#as in R, if no header we name the headers as V1,V2,V3,V4,..
1.upto(csv.first.length).collect { |i| "V#{i}" }
end
#we invert row -> column. It means csv[0] is the first column and not row. Similar to R
csv.by_col!
thash = {}
csv_headers.each_with_index do |header,idx|
thash[header] = Statsample::Vector.new(csv[idx].drop(ignore_lines))
end
Statsample::Dataset.new(thash)
end
# Returns a Dataset based on a csv file
#
# USE:
# ds=Statsample::CSV.read("test_csv.csv")
def read(filename, empty=[''],ignore_lines=0,csv_opts=Hash.new)
first_row=true
fields=[]
#fields_data={}
ds=nil
line_number=0
csv=CSV_klass.open(filename,'rb', csv_opts)
csv.each do |row|
line_number+=1
if(line_number<=ignore_lines)
#puts "Skip line"
next
end
row.collect!{|c| c.to_s }
if first_row
fields=extract_fields(row)
ds=Statsample::Dataset.new(fields)
first_row=false
else
rowa=process_row(row,empty)
ds.add_case(rowa,false)
end
end
convert_to_scale_and_date(ds,fields)
ds.update_valid_data
ds
end
# Save a Dataset on a csv file
#
# USE:
# Statsample::CSV.write(ds,"test_csv.csv")
def write(dataset,filename, convert_comma=false,*opts)
writer=CSV_klass.open(filename,'w',*opts)
writer << dataset.fields
dataset.each_array do|row|
if(convert_comma)
row.collect!{|v| v.to_s.gsub(".",",")}
end
writer << row
end
writer.close
end
end
end
end
================================================
FILE: lib/statsample/converter/spss.rb
================================================
module Statsample
module SPSS
class << self
# Export a SPSS Matrix with tetrachoric correlations .
#
# Use:
# ds=Statsample::Excel.read("my_data.xls")
# puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
def tetrachoric_correlation_matrix(ds)
dsv=ds.dup_only_valid
# Delete all vectors doesn't have variation
dsv.fields.each{|f|
if dsv[f].factors.size==1
dsv.delete_vector(f)
else
dsv[f]=dsv[f].dichotomize
end
}
tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
n=dsv.fields.collect {|f|
sprintf("%d",dsv[f].size)
}
meanlist=dsv.fields.collect{|f|
sprintf("%0.3f", dsv[f].mean)
}
stddevlist=dsv.fields.collect{|f|
sprintf("%0.3f", dsv[f].sd)
}
out=<<-HEREDOC
MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
BEGIN DATA
N #{n.join(" ")}
MEAN #{meanlist.join(" ")}
STDDEV #{stddevlist.join(" ")}
HEREDOC
tcm.row_size.times {|i|
out +="CORR "
(i+1).times {|j|
out+=sprintf("%0.3f",tcm[i,j])+" "
}
out +="\n"
}
out+="END DATA.\nEXECUTE.\n"
end
end
end
end
================================================
FILE: lib/statsample/converters.rb
================================================
require 'statsample/converter/spss'
module Statsample
# Create and dumps Datasets on a database
module Database
class << self
# Read a database query and returns a Dataset
#
# USE:
#
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
# Statsample.read(dbh, "SELECT * FROM test")
#
def read(dbh,query)
require 'dbi'
sth=dbh.execute(query)
vectors={}
fields=[]
sth.column_info.each {|c|
vectors[c['name']]=Statsample::Vector.new([])
vectors[c['name']].name=c['name']
vectors[c['name']].type= (c['type_name']=='INTEGER' or c['type_name']=='DOUBLE') ? :scale : :nominal
fields.push(c['name'])
}
ds=Statsample::Dataset.new(vectors,fields)
sth.fetch do |row|
ds.add_case(row.to_a, false )
end
ds.update_valid_data
ds
end
# Insert each case of the Dataset on the selected table
#
# USE:
#
# ds={'id'=>[1,2,3].to_vector, 'name'=>["a","b","c"].to_vector}.to_dataset
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
# Statsample::Database.insert(ds,dbh,"test")
#
def insert(ds, dbh, table)
require 'dbi'
query="INSERT INTO #{table} ("+ds.fields.join(",")+") VALUES ("+((["?"]*ds.fields.size).join(","))+")"
sth=dbh.prepare(query)
ds.each_array{|c| sth.execute(*c) }
return true
end
# Create a sql, basen on a given Dataset
#
# USE:
#
# ds={'id'=>[1,2,3,4,5].to_vector,'name'=>%w{Alex Peter Susan Mary John}.to_vector}.to_dataset
# Statsample::Database.create_sql(ds,'names')
# ==>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
#
def create_sql(ds,table,charset="UTF8")
sql="CREATE TABLE #{table} ("
fields=ds.fields.collect{|f|
v=ds[f]
f+" "+v.db_type
}
sql+fields.join(",\n ")+") CHARACTER SET=#{charset};"
end
end
end
module Mondrian
class << self
def write(dataset,filename)
File.open(filename,"wb") do |fp|
fp.puts dataset.fields.join("\t")
dataset.each_array_with_nils do |row|
row2=row.collect{|v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
fp.puts row2.join("\t")
end
end
end
end
end
class SpreadsheetBase
class << self
def extract_fields(row)
i=0;
fields=row.to_a.collect{|c|
if c.nil?
i+=1
"var%05d" % i
else
c.to_s.downcase
end
}
fields.recode_repeated
end
def process_row(row,empty)
row.to_a.map do |c|
if empty.include?(c)
nil
else
if c.is_a? String and c.is_number?
if c=~/^\d+$/
c.to_i
else
c.gsub(",",".").to_f
end
else
c
end
end
end
end
def convert_to_scale_and_date(ds,fields)
fields.each do |f|
if ds[f].can_be_scale?
ds[f].type=:scale
elsif ds[f].can_be_date?
ds[f].type=:date
end
end
end
end
end
class PlainText < SpreadsheetBase
class << self
def read(filename, fields)
ds=Statsample::Dataset.new(fields)
fp=File.open(filename,"r")
fp.each_line do |line|
row=process_row(line.strip.split(/\s+/),[""])
next if row==["\x1A"]
ds.add_case_array(row)
end
convert_to_scale_and_date(ds,fields)
ds.update_valid_data
fields.each {|f|
ds[f].name=f
}
ds
end
end
end
class Excel < SpreadsheetBase
class << self
# Write a Excel spreadsheet based on a dataset
# * TODO: Format nicely date values
def write(dataset,filename)
require 'spreadsheet'
book = Spreadsheet::Workbook.new
sheet = book.create_worksheet
format = Spreadsheet::Format.new :color => :blue,
:weight => :bold
sheet.row(0).concat(dataset.fields.map {|i| i.dup}) # Unfreeze strings
sheet.row(0).default_format = format
i=1
dataset.each_array{|row|
sheet.row(i).concat(row)
i+=1
}
book.write(filename)
end
# This should be fixed.
# If we have a Formula, should be resolver first
def preprocess_row(row, dates)
i=-1
row.collect!{|c|
i+=1
if c.is_a? Spreadsheet::Formula
if(c.value.is_a? Spreadsheet::Excel::Error)
nil
else
c.value
end
elsif dates.include? i and !c.nil? and c.is_a? Numeric
row.date(i)
else
c
end
}
end
private :process_row, :preprocess_row
# Returns a dataset based on a xls file
# USE:
# ds = Statsample::Excel.read("test.xls")
#
def read(filename, opts=Hash.new)
require 'spreadsheet'
raise "options should be Hash" unless opts.is_a? Hash
opts_default={
:worksheet_id=>0,
:ignore_lines=>0,
:empty=>['']
}
opts=opts_default.merge opts
worksheet_id=opts[:worksheet_id]
ignore_lines=opts[:ignore_lines]
empty=opts[:empty]
first_row=true
fields=[]
fields_data={}
ds=nil
line_number=0
book = Spreadsheet.open filename
sheet= book.worksheet worksheet_id
sheet.each do |row|
begin
dates=[]
row.formats.each_index{|i|
if !row.formats[i].nil? and row.formats[i].number_format=="DD/MM/YYYY"
dates.push(i)
end
}
line_number+=1
next if(line_number<=ignore_lines)
preprocess_row(row,dates)
if first_row
fields=extract_fields(row)
ds=Statsample::Dataset.new(fields)
first_row=false
else
rowa=process_row(row,empty)
(fields.size - rowa.size).times {
rowa << nil
}
ds.add_case(rowa,false)
end
rescue => e
error="#{e.to_s}\nError on Line # #{line_number}:#{row.join(",")}"
raise
end
end
convert_to_scale_and_date(ds, fields)
ds.update_valid_data
fields.each {|f|
ds[f].name=f
}
ds.name=filename
ds
end
end
end
module Mx
class << self
def write(dataset,filename,type=:covariance)
puts "Writing MX File"
File.open(filename,"w") do |fp|
fp.puts "! #{filename}"
fp.puts "! Output generated by Statsample"
fp.puts "Data Ninput=#{dataset.fields.size} Nobservations=#{dataset.cases}"
fp.puts "Labels "+dataset.fields.join(" ")
case type
when :raw
fp.puts "Rectangular"
dataset.each do |row|
out=dataset.fields.collect do |f|
if dataset[f].is_valid? row[f]
row[f]
else
"."
end
end
fp.puts out.join("\t")
end
fp.puts "End Rectangular"
when :covariance
fp.puts " CMatrix Full"
cm=Statsample::Bivariate.covariance_matrix(dataset)
d=(0...(cm.row_size)).collect {|row|
(0...(cm.column_size)).collect{|col|
cm[row,col].nil? ? "." : sprintf("%0.3f", cm[row,col])
}.join(" ")
}.join("\n")
fp.puts d
end
end
end
end
end
module GGobi
class << self
def write(dataset,filename,opt={})
File.open(filename,"w") {|fp|
fp.write(self.out(dataset,opt))
}
end
def out(dataset,opt={})
require 'ostruct'
default_opt = {:dataname => "Default", :description=>"", :missing=>"NA"}
default_opt.merge! opt
carrier=OpenStruct.new
carrier.categorials=[]
carrier.conversions={}
variables_def=dataset.fields.collect{|k|
variable_definition(carrier,dataset[k],k)
}.join("\n")
indexes=carrier.categorials.inject({}) {|s,c|
s[dataset.fields.index(c)]=c
s
}
records=""
dataset.each_array {|c|
indexes.each{|ik,iv|
c[ik]=carrier.conversions[iv][c[ik]]
}
records << "<record>#{values_definition(c, default_opt[:missing])}</record>\n"
}
out=<<EOC
<?xml version="1.0"?>
<!DOCTYPE ggobidata SYSTEM "ggobi.dtd">
<ggobidata count="1">
<data name="#{default_opt[:dataname]}">
<description>#{default_opt[:description]}</description>
<variables count="#{dataset.fields.size}">
#{variables_def}
</variables>
<records count="#{dataset.cases}" missingValue="#{default_opt[:missing]}">
#{records}
</records>
</data>
</ggobidata>
EOC
out
end
def values_definition(c,missing)
c.collect{|v|
if v.nil?
"#{missing}"
elsif v.is_a? Numeric
"#{v}"
else
"#{v.gsub(/\s+/,"_")}"
end
}.join(" ")
end
# Outputs a string for a variable definition
# v = vector
# name = name of the variable
# nickname = nickname
def variable_definition(carrier,v,name,nickname=nil)
nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
if v.type==:nominal or v.data.find {|d| d.is_a? String }
carrier.categorials.push(name)
carrier.conversions[name]={}
factors=v.factors
out ="<categoricalvariable name=\"#{name}\" #{nickname}>\n"
out << "<levels count=\"#{factors.size}\">\n"
out << (1..factors.size).to_a.collect{|i|
carrier.conversions[name][factors[i-1]]=i
"<level value=\"#{i}\">#{v.labeling(factors[i-1])}</level>"
}.join("\n")
out << "</levels>\n</categoricalvariable>\n"
out
elsif v.data.find {|d| d.is_a? Float}
"<realvariable name=\"#{name}\" #{nickname} />"
else
"<integervariable name=\"#{name}\" #{nickname} />"
end
end
end
end
end
require 'statsample/converter/csv.rb'
================================================
FILE: lib/statsample/crosstab.rb
================================================
module Statsample
# Class to create crosstab of data
# With this, you can create reports and do chi square test
# The first vector will be at rows and the second will the the columns
#
class Crosstab
include Summarizable
attr_reader :v_rows, :v_cols
attr_accessor :row_label, :column_label, :name, :percentage_row, :percentage_column, :percentage_total
def initialize(v1, v2, opts=Hash.new)
#raise ArgumentError, "Both arguments should be Vectors" unless v1.is_a? Statsample::Vector and v2.is_a? Statsample::Vector
raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size
@v_rows, @v_cols=Statsample.only_valid_clone(v1.to_vector,v2.to_vector)
@cases=@v_rows.size
@row_label=v1.name
@column_label=v2.name
@name=nil
@percentage_row = @percentage_column = @percentage_total=false
opts.each{|k,v|
self.send("#{k}=",v) if self.respond_to? k
}
@name||=_("Crosstab %s - %s") % [@row_label, @column_label]
end
def rows_names
@v_rows.factors.sort
end
def cols_names
@v_cols.factors.sort
end
def rows_total
@v_rows.frequencies
end
def cols_total
@v_cols.frequencies
end
def frequencies
base=rows_names.inject([]){|s,row|
s+=cols_names.collect{|col| [row,col]}
}.inject({}) {|s,par|
s[par]=0
s
}
base.update(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a.to_vector.frequencies)
end
def to_matrix
f=frequencies
rn=rows_names
cn=cols_names
Matrix.rows(rn.collect{|row|
cn.collect{|col| f[[row,col]]}
})
end
def frequencies_by_row
f=frequencies
rows_names.inject({}){|sr,row|
sr[row]=cols_names.inject({}) {|sc,col| sc[col]=f[[row,col]]; sc}
sr
}
end
def frequencies_by_col
f=frequencies
cols_names.inject({}){|sc,col|
sc[col]=rows_names.inject({}) {|sr,row| sr[row]=f[[row,col]]; sr}
sc
}
end
# Chi square, based on expected and real matrix
def chi_square
require 'statsample/test'
Statsample::Test.chi_square(self.to_matrix, matrix_expected)
end
# Useful to obtain chi square
def matrix_expected
rn=rows_names
cn=cols_names
rt=rows_total
ct=cols_total
t=@v_rows.size
m=rn.collect{|row|
cn.collect{|col|
(rt[row]*ct[col]).quo(t)
}
}
Matrix.rows(m)
end
def cols_empty_hash
cols_names.inject({}) {|a,x| a[x]=0;a}
end
def report_building(builder)
builder.section(:name=>@name) do |generator|
fq=frequencies
rn=rows_names
cn=cols_names
total=0
total_cols=cols_empty_hash
generator.text "Chi Square: #{chi_square}"
generator.text(_("Rows: %s") % @row_label) unless @row_label.nil?
generator.text(_("Columns: %s") % @column_label) unless @column_label.nil?
t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c)}+[_("Total")])
rn.each do |row|
total_row=0
t_row=[@v_rows.labeling(row)]
cn.each do |col|
data=fq[[row,col]]
total_row+=fq[[row,col]]
total+=fq[[row,col]]
total_cols[col]+=fq[[row,col]]
t_row.push(data)
end
t_row.push(total_row)
t.row(t_row)
end
t.hr
t_row=[_("Total")]
cn.each do |v|
t_row.push(total_cols[v])
end
t_row.push(total)
t.row(t_row)
generator.parse_element(t)
if(@percentage_row)
table_percentage(generator,:row)
end
if(@percentage_column)
table_percentage(generator,:column)
end
if(@percentage_total)
table_percentage(generator,:total)
end
end
end
def table_percentage(generator,type)
fq=frequencies
cn=cols_names
rn=rows_names
rt=rows_total
ct=cols_total
type_name=case type
when :row then _("% Row")
when :column then _("% Column")
when :total then _("% Total")
end
t=ReportBuilder::Table.new(:name=>@name+" - "+_(type_name), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c) } + [_("Total")])
rn.each do |row|
t_row=[@v_rows.labeling(row)]
cn.each do |col|
total=case type
when :row then rt[row]
when :column then ct[col]
when :total then @cases
end
data = sprintf("%0.2f%%", fq[[row,col]]*100.0/ total )
t_row.push(data)
end
total=case type
when :row then rt[row]
when :column then @cases
when :total then @cases
end
t_row.push(sprintf("%0.2f%%", rt[row]*100.0/total))
t.row(t_row)
end
t.hr
t_row=[_("Total")]
cn.each{|col|
total=case type
when :row then @cases
when :column then ct[col]
when :total then @cases
end
t_row.push(sprintf("%0.2f%%", ct[col]*100.0/total))
}
t_row.push("100%")
t.row(t_row)
generator.parse_element(t)
end
end
end
================================================
FILE: lib/statsample/dataset.rb
================================================
require 'statsample/vector'
class Hash
# Creates a Statsample::Dataset based on a Hash
def to_dataset(*args)
Statsample::Dataset.new(self, *args)
end
end
class Array
def prefix(s) # :nodoc:
self.collect{|c| s+c.to_s }
end
def suffix(s) # :nodoc:
self.collect{|c| c.to_s+s }
end
end
module Statsample
class DatasetException < RuntimeError # :nodoc:
attr_reader :ds,:exp
def initialize(ds,e)
@ds=ds
@exp=e
end
def to_s
m="Error on iteration: "+@exp.message+"\n"+@exp.backtrace.join("\n")
m+="\nRow ##{@ds.i}:#{@ds.case_as_hash(@ds.i)}" unless @ds.i.nil?
m
end
end
# Set of cases with values for one or more variables,
# analog to a dataframe on R or a standard data file of SPSS.
# Every vector has <tt>#field</tt> name, which represent it. By default,
# the vectors are ordered by it field name, but you can change it
# the fields order manually.
# The Dataset work as a Hash, with keys are field names
# and values are Statsample::Vector
#
#
# ==Usage
# Create a empty dataset:
# Dataset.new()
# Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>:
# Dataset.new(%w{v1 v2 v3})
# Create a dataset with two vectors, called <tt>v1</tt>
# and <tt>v2</tt>:
# Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
# Create a dataset with two given vectors (v1 and v2),
# with vectors on inverted order:
# Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
#
# The fast way to create a dataset uses Hash#to_dataset, with
# field order as arguments
# v1 = [1,2,3].to_scale
# v2 = [1,2,3].to_scale
# ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
class Dataset
include Writable
include Summarizable
# Hash of Statsample::Vector
attr_reader :vectors
# Ordered ids of vectors
attr_reader :fields
# Name of dataset
attr_accessor :name
# Number of cases
attr_reader :cases
# Location of pointer on enumerations methods (like #each)
attr_reader :i
# Generates a new dataset, using three vectors
# - Rows
# - Columns
# - Values
#
# For example, you have these values
#
# x y v
# a a 0
# a b 1
# b a 1
# b b 0
#
# You obtain
# id a b
# a 0 1
# b 1 0
#
# Useful to process outputs from databases
def self.crosstab_by_asignation(rows,columns,values)
raise "Three vectors should be equal size" if rows.size!=columns.size or rows.size!=values.size
cols_values=columns.factors
cols_n=cols_values.size
h_rows=rows.factors.inject({}){|a,v| a[v]=cols_values.inject({}){
|a1,v1| a1[v1]=nil; a1
}
;a}
values.each_index{|i|
h_rows[rows[i]][columns[i]]=values[i]
}
ds=Dataset.new(["_id"]+cols_values)
cols_values.each{|c|
ds[c].type=values.type
}
rows.factors.each {|row|
n_row=Array.new(cols_n+1)
n_row[0]=row
cols_values.each_index {|i|
n_row[i+1]=h_rows[row][cols_values[i]]
}
ds.add_case_array(n_row)
}
ds.update_valid_data
ds
end
# Return true if any vector has missing data
def has_missing_data?
@vectors.any? {|k,v| v.has_missing_data?}
end
# Return a nested hash using fields as keys and
# an array constructed of hashes with other values.
# If block provided, is used to provide the
# values, with parameters +row+ of dataset,
# +current+ last hash on hierarchy and
# +name+ of the key to include
def nest(*tree_keys,&block)
tree_keys=tree_keys[0] if tree_keys[0].is_a? Array
out=Hash.new
each do |row|
current=out
# Create tree
tree_keys[0,tree_keys.size-1].each do |f|
root=row[f]
current[root]||=Hash.new
current=current[root]
end
name=row[tree_keys.last]
if !block
current[name]||=Array.new
current[name].push(row.delete_if{|key,value| tree_keys.include? key})
else
current[name]=block.call(row, current,name)
end
end
out
end
# Creates a new dataset. A dataset is a set of ordered named vectors
# of the same size.
#
# [vectors] With an array, creates a set of empty vectors named as
# values on the array. With a hash, each Vector is assigned as
# a variable of the Dataset named as its key
# [fields] Array of names for vectors. Is only used for set the
# order of variables. If empty, vectors keys on alfabethic order as
# used as fields.
def initialize(vectors={}, fields=[])
@@n_dataset||=0
@@n_dataset+=1
@name=_("Dataset %d") % @@n_dataset
@cases=0
@gsl=nil
@i=nil
if vectors.instance_of? Array
@fields=vectors.dup
@vectors=vectors.inject({}){|a,x| a[x]=Statsample::Vector.new(); a}
else
# Check vectors
@vectors=vectors
@fields=fields
check_order
check_length
end
end
#
# Creates a copy of the given dataset, deleting all the cases with
# missing data on one of the vectors.
#
# @param array of fields to include. No value include all fields
#
def dup_only_valid(*fields_to_include)
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
fields_to_include=fields_to_include[0]
end
fields_to_include=@fields if fields_to_include.size==0
if fields_to_include.any? {|f| @vectors[f].has_missing_data?}
ds=Dataset.new(fields_to_include)
fields_to_include.each {|f| ds[f].type=@vectors[f].type}
each {|row|
unless fields_to_include.any? {|f| @vectors[f].has_missing_data? and !@vectors[f].is_valid? row[f]}
row_2=fields_to_include.inject({}) {|ac,v| ac[v]=row[v]; ac}
ds.add_case(row_2)
end
}
else
ds=dup fields_to_include
end
ds.name= self.name
ds
end
#
# Returns a duplicate of the Dataset.
# All vectors are copied, so any modification on new
# dataset doesn't affect original dataset's vectors.
# If fields given as parameter, only include those vectors.
#
# @param array of fields to include. No value include all fields
# @return {Statsample::Dataset}
def dup(*fields_to_include)
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
fields_to_include=fields_to_include[0]
end
fields_to_include=@fields if fields_to_include.size==0
vectors={}
fields=[]
fields_to_include.each{|f|
raise "Vector #{f} doesn't exists" unless @vectors.has_key? f
vectors[f]=@vectors[f].dup
fields.push(f)
}
ds=Dataset.new(vectors,fields)
ds.name= self.name
ds
end
# Returns an array with the fields from first argumen to last argument
def from_to(from,to)
raise ArgumentError, "Field #{from} should be on dataset" if !@fields.include? from
raise ArgumentError, "Field #{to} should be on dataset" if !@fields.include? to
@fields.slice(@fields.index(from)..@fields.index(to))
end
# Returns (when possible) a cheap copy of dataset.
# If no vector have missing values, returns original vectors.
# If missing values presents, uses Dataset.dup_only_valid.
#
# @param array of fields to include. No value include all fields
# @return {Statsample::Dataset}
def clone_only_valid(*fields_to_include)
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
fields_to_include=fields_to_include[0]
end
fields_to_include=@fields.dup if fields_to_include.size==0
if fields_to_include.any? {|v| @vectors[v].has_missing_data?}
dup_only_valid(fields_to_include)
else
clone(fields_to_include)
end
end
# Returns a shallow copy of Dataset.
# Object id will be distinct, but @vectors will be the same.
# @param array of fields to include. No value include all fields
# @return {Statsample::Dataset}
def clone(*fields_to_include)
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
fields_to_include=fields_to_include[0]
end
fields_to_include=@fields.dup if fields_to_include.size==0
ds=Dataset.new
fields_to_include.each{|f|
raise "Vector #{f} doesn't exists" unless @vectors.has_key? f
ds[f]=@vectors[f]
}
ds.fields=fields_to_include
ds.name=@name
ds.update_valid_data
ds
end
# Creates a copy of the given dataset, without data on vectors
#
# @return {Statsample::Dataset}
def dup_empty
vectors=@vectors.inject({}) {|a,v|
a[v[0]]=v[1].dup_empty
a
}
Dataset.new(vectors,@fields.dup)
end
# Merge vectors from two datasets
# In case of name collition, the vectors names are changed to
# x_1, x_2 ....
#
# @return {Statsample::Dataset}
def merge(other_ds)
raise "Cases should be equal (this:#{@cases}; other:#{other_ds.cases}" unless @cases==other_ds.cases
types = @fields.collect{|f| @vectors[f].type} + other_ds.fields.collect{|f| other_ds[f].type}
new_fields = (@fields+other_ds.fields).recode_repeated
ds_new=Statsample::Dataset.new(new_fields)
new_fields.each_index{|i|
field=new_fields[i]
ds_new[field].type=types[i]
}
@cases.times {|i|
row=case_as_array(i)+other_ds.case_as_array(i)
ds_new.add_case_array(row)
}
ds_new.update_valid_data
ds_new
end
# Join 2 Datasets by given fields
# type is one of :left and :inner, default is :left
#
# @return {Statsample::Dataset}
def join(other_ds,fields_1=[],fields_2=[],type=:left)
fields_new = other_ds.fields - fields_2
fields = self.fields + fields_new
other_ds_hash = {}
other_ds.each do |row|
key = row.select{|k,v| fields_2.include?(k)}.values
value = row.select{|k,v| fields_new.include?(k)}
if other_ds_hash[key].nil?
other_ds_hash[key] = [value]
else
other_ds_hash[key] << value
end
end
new_ds = Dataset.new(fields)
self.each do |row|
key = row.select{|k,v| fields_1.include?(k)}.values
new_case = row.dup
if other_ds_hash[key].nil?
if type == :left
fields_new.each{|field| new_case[field] = nil}
new_ds.add_case(new_case)
end
else
other_ds_hash[key].each do |new_values|
new_ds.add_case new_case.merge(new_values)
end
end
end
new_ds
end
# Returns a dataset with standarized data.
#
# @return {Statsample::Dataset}
def standarize
ds=dup()
ds.fields.each do |f|
ds[f]=ds[f].vector_standarized
end
ds
end
# Generate a matrix, based on fields of dataset
#
# @return {::Matrix}
def collect_matrix
rows=@fields.collect{|row|
@fields.collect{|col|
yield row,col
}
}
Matrix.rows(rows)
end
# We have the same datasets if +vectors+ and +fields+ are the same
#
# @return {Boolean}
def ==(d2)
@vectors==d2.vectors and @fields==d2.fields
end
# Returns vector <tt>c</tt>
#
# @return {Statsample::Vector}
def col(c)
@vectors[c]
end
alias_method :vector, :col
# Equal to Dataset[<tt>name</tt>]=<tt>vector</tt>
#
# @return self
def add_vector(name, vector)
raise ArgumentError, "Vector have different size" if vector.size!=@cases
@vectors[name]=vector
check_order
self
end
# Returns true if dataset have vector <tt>v</tt>.
#
# @return {Boolean}
def has_vector? (v)
return @vectors.has_key?(v)
end
# Creates a dataset with the random data, of a n size
# If n not given, uses original number of cases.
#
# @return {Statsample::Dataset}
def bootstrap(n=nil)
n||=@cases
ds_boot=dup_empty
n.times do
ds_boot.add_case_array(case_as_array(rand(n)))
end
ds_boot.update_valid_data
ds_boot
end
# Fast version of #add_case.
# Can only add one case and no error check if performed
# You SHOULD use #update_valid_data at the end of insertion cycle
#
#
def add_case_array(v)
v.each_index {|i| d=@vectors[@fields[i]].data; d.push(v[i])}
end
# Insert a case, using:
# * Array: size equal to number of vectors and values in the same order as fields
# * Hash: keys equal to fields
# If uvd is false, #update_valid_data is not executed after
# inserting a case. This is very useful if you want to increase the
# performance on inserting many cases, because #update_valid_data
# performs check on vectors and on the dataset
def add_case(v,uvd=true)
case v
when Array
if (v[0].is_a? Array)
v.each{|subv| add_case(subv,false)}
else
raise ArgumentError, "Input array size (#{v.size}) should be equal to fields number (#{@fields.size})" if @fields.size!=v.size
v.each_index {|i| @vectors[@fields[i]].add(v[i],false)}
end
when Hash
raise ArgumentError, "Hash keys should be equal to fields #{(v.keys - @fields).join(",")}" if @fields.sort!=v.keys.sort
@fields.each{|f| @vectors[f].add(v[f],false)}
else
raise TypeError, 'Value must be a Array or a Hash'
end
if uvd
update_valid_data
end
end
# Check vectors and fields after inserting data. Use only
# after #add_case_array or #add_case with second parameter to false
def update_valid_data
@gsl=nil
@fields.each{|f| @vectors[f].set_valid_data}
check_length
end
# Delete vector named +name+. Multiple fields accepted.
def delete_vector(*args)
if args.size==1 and args[0].is_a? Array
names=args[0]
else
names=args
end
names.each do |name|
@fields.delete(name)
@vectors.delete(name)
end
end
def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT_TOKEN)
split=@vectors[name_].split_by_separator(sep)
i=1
split.each{|k,v|
new_field=name_+join+i.to_s
v.name=name_+":"+k
add_vector(new_field,v)
i+=1
}
end
def add_vectors_by_split(name,join='-',sep=Statsample::SPLIT_TOKEN)
split=@vectors[name].split_by_separator(sep)
split.each{|k,v|
add_vector(name+join+k,v)
}
end
def vector_by_calculation(type=:scale)
a=[]
each do |row|
a.push(yield(row))
end
a.to_vector(type)
end
# Returns a vector with sumatory of fields
# if fields parameter is empty, sum all fields
def vector_sum(fields=nil)
fields||=@fields
vector=collect_with_index do |row, i|
if(fields.find{|f| !@vectors[f].data_with_nils[i]})
nil
else
fields.inject(0) {|ac,v| ac + row[v].to_f}
end
end
vector.name=_("Sum from %s") % @name
vector
end
# Check if #fields attribute is correct, after inserting or deleting vectors
def check_fields(fields)
fields||=@fields
raise "Fields #{(fields-@fields).join(", ")} doesn't exists on dataset" if (fields-@fields).size>0
fields
end
# Returns a vector with the numbers of missing values for a case
def vector_missing_values(fields=nil)
fields=check_fields(fields)
collect_with_index do |row, i|
fields.inject(0) {|a,v|
a+ ((@vectors[v].data_with_nils[i].nil?) ? 1: 0)
}
end
end
def vector_count_characters(fields=nil)
fields=check_fields(fields)
collect_with_index do |row, i|
fields.inject(0){|a,v|
a+((@vectors[v].data_with_nils[i].nil?) ? 0: row[v].to_s.size)
}
end
end
# Returns a vector with the mean for a set of fields
# if fields parameter is empty, return the mean for all fields
# if max invalid parameter > 0, returns the mean for all tuples
# with 0 to max_invalid invalid fields
def vector_mean(fields=nil, max_invalid=0)
a=[]
fields=check_fields(fields)
size=fields.size
each_with_index do |row, i |
# numero de invalidos
sum=0
invalids=0
fields.each{|f|
if !@vectors[f].data_with_nils[i].nil?
sum+=row[f].to_f
else
invalids+=1
end
}
if(invalids>max_invalid)
a.push(nil)
else
a.push(sum.quo(size-invalids))
end
end
a=a.to_vector(:scale)
a.name=_("Means from %s") % @name
a
end
# Check vectors for type and size.
def check_length # :nodoc:
size=nil
@vectors.each do |k,v|
raise Exception, "Data #{v.class} is not a vector on key #{k}" if !v.is_a? Statsample::Vector
if size.nil?
size=v.size
else
if v.size!=size
raise Exception, "Vector #{k} have size #{v.size} and dataset have size #{size}"
end
end
end
@cases=size
end
# Retrieves each vector as [key, vector]
def each_vector # :yield: |key, vector|
@fields.each{|k| yield k, @vectors[k]}
end
if Statsample::STATSAMPLE__.respond_to?(:case_as_hash)
def case_as_hash(c) # :nodoc:
Statsample::STATSAMPLE__.case_as_hash(self,c)
end
else
# Retrieves case i as a hash
def case_as_hash(i)
_case_as_hash(i)
end
end
if Statsample::STATSAMPLE__.respond_to?(:case_as_array)
def case_as_array(c) # :nodoc:
Statsample::STATSAMPLE__.case_as_array(self,c)
end
else
# Retrieves case i as a array, ordered on #fields order
def case_as_array(i)
_case_as_array(i)
end
end
def _case_as_hash(c) # :nodoc:
@fields.inject({}) {|a,x| a[x]=@vectors[x][c];a }
end
def _case_as_array(c) # :nodoc:
@fields.collect {|x| @vectors[x][c]}
end
# Returns each case as a hash
def each
begin
@i=0
@cases.times {|i|
@i=i
row=case_as_hash(i)
yield row
}
@i=nil
rescue =>e
raise DatasetException.new(self, e)
end
end
# Returns each case as hash and index
def each_with_index # :yield: |case, i|
begin
@i=0
@cases.times{|i|
@i=i
row=case_as_hash(i)
yield row, i
}
@i=nil
rescue =>e
raise DatasetException.new(self, e)
end
end
# Returns each case as an array, coding missing values as nils
def each_array_with_nils
m=fields.size
@cases.times {|i|
@i=i
row=Array.new(m)
fields.each_index{|j|
f=fields[j]
row[j]=@vectors[f].data_with_nils[i]
}
yield row
}
@i=nil
end
# Returns each case as an array
def each_array
@cases.times {|i|
@i=i
row=case_as_array(i)
yield row
}
@i=nil
end
# Set fields order. If you omit one or more vectors, they are
# ordered by alphabetic order.
def fields=(f)
@fields=f
check_order
end
# Check congruence between +fields+ attribute
# and keys on +vectors
def check_order #:nodoc:
if(@vectors.keys.sort!=@fields.sort)
@fields=@fields&@vectors.keys
@fields+=@vectors.keys.sort-@fields
end
end
# Returns the vector named i
def[](i)
if i.is_a? Range
fields=from_to(i.begin,i.end)
clone(*fields)
elsif i.is_a? Array
clone(i)
else
raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
@vectors[i]
end
end
# Retrieves a Statsample::Vector, based on the result
# of calculation performed on each case.
def collect(type=:scale)
data=[]
each {|row|
data.push yield(row)
}
Statsample::Vector.new(data,type)
end
# Same as Statsample::Vector.collect, but giving case index as second parameter on yield.
def collect_with_index(type=:scale)
data=[]
each_with_index {|row, i|
data.push(yield(row, i))
}
Statsample::Vector.new(data,type)
end
# Recode a vector based on a block
def recode!(vector_name)
0.upto(@cases-1) {|i|
@vectors[vector_name].data[i]=yield case_as_hash(i)
}
@vectors[vector_name].set_valid_data
end
def crosstab(v1,v2,opts={})
Statsample::Crosstab.new(@vectors[v1], @vectors[v2],opts)
end
def[]=(i,v)
if v.instance_of? Statsample::Vector
@vectors[i]=v
check_order
else
raise ArgumentError,"Should pass a Statsample::Vector"
end
end
# Return data as a matrix. Column are ordered by #fields and
# rows by orden of insertion
def to_matrix
rows=[]
self.each_array{|c|
rows.push(c)
}
Matrix.rows(rows)
end
if Statsample.has_gsl?
def clear_gsl
@gsl=nil
end
def to_gsl
if @gsl.nil?
if cases.nil?
update_valid_data
end
@gsl=GSL::Matrix.alloc(cases,fields.size)
self.each_array{|c|
@gsl.set_row(@i,c)
}
end
@gsl
end
end
# Return a correlation matrix for fields included as parameters.
# By default, uses all fields of dataset
def correlation_matrix(fields=nil)
if fields
ds=clone(fields)
else
ds=self
end
Statsample::Bivariate.correlation_matrix(ds)
end
# Return a correlation matrix for fields included as parameters.
# By default, uses all fields of dataset
def covariance_matrix(fields=nil)
if fields
ds=clone(fields)
else
ds=self
end
Statsample::Bivariate.covariance_matrix(ds)
end
# Create a new dataset with all cases which the block returns true
def filter
ds=self.dup_empty
each {|c|
ds.add_case(c, false) if yield c
}
ds.update_valid_data
ds.name=_("%s(filtered)") % @name
ds
end
# creates a new vector with the data of a given field which the block returns true
def filter_field(field)
a=[]
each do |c|
a.push(c[field]) if yield c
end
a.to_vector(@vectors[field].type)
end
# Creates a Stastample::Multiset, using one or more fields
# to split the dataset.
def to_multiset_by_split(*fields)
require 'statsample/multiset'
if fields.size==1
to_multiset_by_split_one_field(fields[0])
else
to_multiset_by_split_multiple_fields(*fields)
end
end
# Creates a Statsample::Multiset, using one field
def to_multiset_by_split_one_field(field)
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
factors=@vectors[field].factors
ms=Multiset.new_empty_vectors(@fields, factors)
each {|c|
ms[c[field]].add_case(c,false)
}
#puts "Ingreso a los dataset"
ms.datasets.each {|k,ds|
ds.update_valid_data
ds.name=@vectors[field].labeling(k)
ds.vectors.each{|k1,v1|
# puts "Vector #{k1}:"+v1.to_s
v1.type=@vectors[k1].type
v1.name=@vectors[k1].name
v1.labels=@vectors[k1].labels
}
}
ms
end
def to_multiset_by_split_multiple_fields(*fields)
factors_total=nil
fields.each do |f|
if factors_total.nil?
factors_total=@vectors[f].factors.collect{|c|
[c]
}
else
suma=[]
factors=@vectors[f].factors
factors_total.each{|f1| factors.each{|f2| suma.push(f1+[f2]) } }
factors_total=suma
end
end
ms=Multiset.new_empty_vectors(@fields,factors_total)
p1=eval "Proc.new {|c| ms[["+fields.collect{|f| "c['#{f}']"}.join(",")+"]].add_case(c,false) }"
each{|c| p1.call(c)}
ms.datasets.each do |k,ds|
ds.update_valid_data
ds.name=fields.size.times.map {|i|
f=fields[i]
sk=k[i]
@vectors[f].labeling(sk)
}.join("-")
ds.vectors.each{|k1,v1|
v1.type=@vectors[k1].type
v1.name=@vectors[k1].name
v1.labels=@vectors[k1].labels
}
end
ms
end
# Returns a vector, based on a string with a calculation based
# on vector
# The calculation will be eval'ed, so you can put any variable
# or expression valid on ruby
# For example:
# a=[1,2].to_vector(scale)
# b=[3,4].to_vector(scale)
# ds={'a'=>a,'b'=>b}.to_dataset
# ds.compute("a+b")
# => Vector [4,6]
def compute(text)
@fields.each{|f|
if @vectors[f].type=:scale
text.gsub!(f,"row['#{f}'].to_f")
else
text.gsub!(f,"row['#{f}']")
end
}
collect_with_index {|row, i|
invalid=false
@fields.each{|f|
if @vectors[f].data_with_nils[i].nil?
invalid=true
end
}
if invalid
nil
else
eval(text)
end
}
end
# Test each row with one or more tests
# each test is a Proc with the form
# Proc.new {|row| row['age']>0}
# The function returns an array with all errors
def verify(*tests)
if(tests[0].is_a? String)
id=tests[0]
tests.shift
else
id=@fields[0]
end
vr=[]
i=0
each do |row|
i+=1
tests.each{|test|
if ! test[2].call(row)
values=""
if test[1].size>0
values=" ("+test[1].collect{|k| "#{k}=#{row[k]}"}.join(", ")+")"
end
vr.push("#{i} [#{row[id]}]: #{test[0]}#{values}")
end
}
end
vr
end
def to_s
"#<"+self.class.to_s+":"+self.object_id.to_s+" @name=#{@name} @fields=["+@fields.join(",")+"] cases="+@vectors[@fields[0]].size.to_s
end
def inspect
self.to_s
end
# Creates a new dataset for one to many relations
# on a dataset, based on pattern of field names.
#
# for example, you have a survey for number of children
# with this structure:
# id, name, child_name_1, child_age_1, child_name_2, child_age_2
# with
# ds.one_to_many(%w{id}, "child_%v_%n"
# the field of first parameters will be copied verbatim
# to new dataset, and fields which responds to second
# pattern will be added one case for each different %n.
# For example
# cases=[
# ['1','george','red',10,'blue',20,nil,nil],
# ['2','fred','green',15,'orange',30,'white',20],
# ['3','alfred',nil,nil,nil,nil,nil,nil]
# ]
# ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
# cases.each {|c| ds.add_case_array c }
# ds.one_to_many(['id'],'car_%v%n').to_matrix
# => Matrix[
# ["red", "1", 10],
# ["blue", "1", 20],
# ["green", "2", 15],
# ["orange", "2", 30],
# ["white", "2", 20]
# ]
#
def one_to_many(parent_fields, pattern)
#base_pattern=pattern.gsub(/%v|%n/,"")
re=Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
ds_vars=parent_fields
vars=[]
max_n=0
h=parent_fields.inject({}) {|a,v| a[v]=Statsample::Vector.new([], @vectors[v].type);a }
# Adding _row_id
h['_col_id']=[].to_scale
ds_vars.push("_col_id")
@fields.each do |f|
if f=~re
if !vars.include? $1
vars.push($1)
h[$1]=Statsample::Vector.new([], @vectors[f].type)
end
max_n=$2.to_i if max_n < $2.to_i
end
end
ds=Dataset.new(h,ds_vars+vars)
each do |row|
row_out={}
parent_fields.each do |f|
row_out[f]=row[f]
end
max_n.times do |n1|
n=n1+1
any_data=false
vars.each do |v|
data=row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)]
row_out[v]=data
any_data=true if !data.nil?
end
if any_data
row_out["_col_id"]=n
ds.add_case(row_out,false)
end
end
end
ds.update_valid_data
ds
end
def report_building(b)
b.section(:name=>@name) do |g|
g.text _"Cases: %d" % cases
@fields.each do |f|
g.text "Element:[#{f}]"
g.parse_element(@vectors[f])
end
end
end
end
end
================================================
FILE: lib/statsample/dominanceanalysis/bootstrap.rb
================================================
module Statsample
class DominanceAnalysis
# == Goal
# Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
#
# == Usage
#
# require 'statsample'
# a=100.times.collect {rand}.to_scale
# b=100.times.collect {rand}.to_scale
# c=100.times.collect {rand}.to_scale
# d=100.times.collect {rand}.to_scale
# ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
# ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
# dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true)
# dab.bootstrap(100,nil)
# puts dab.summary
# <strong>Output</strong>
# Sample size: 100
# t: 1.98421693632958
#
# Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
# Table: Bootstrap report
# --------------------------------------------------------------------------------------------
# | pairs | sD | Dij | SE(Dij) | Pij | Pji | Pno | Reproducibility |
# --------------------------------------------------------------------------------------------
# | Complete dominance |
# --------------------------------------------------------------------------------------------
# | a - b | 1.0 | 0.6150 | 0.454 | 0.550 | 0.320 | 0.130 | 0.550 |
# | a - c | 1.0 | 0.9550 | 0.175 | 0.930 | 0.020 | 0.050 | 0.930 |
# | a - d | 1.0 | 0.9750 | 0.131 | 0.960 | 0.010 | 0.030 | 0.960 |
# | b - c | 1.0 | 0.8800 | 0.276 | 0.820 | 0.060 | 0.120 | 0.820 |
# | b - d | 1.0 | 0.9250 | 0.193 | 0.860 | 0.010 | 0.130 | 0.860 |
# | c - d | 0.5 | 0.5950 | 0.346 | 0.350 | 0.160 | 0.490 | 0.490 |
# --------------------------------------------------------------------------------------------
# | Conditional dominance |
# --------------------------------------------------------------------------------------------
# | a - b | 1.0 | 0.6300 | 0.458 | 0.580 | 0.320 | 0.100 | 0.580 |
# | a - c | 1.0 | 0.9700 | 0.156 | 0.960 | 0.020 | 0.020 | 0.960 |
# | a - d | 1.0 | 0.9800 | 0.121 | 0.970 | 0.010 | 0.020 | 0.970 |
# | b - c | 1.0 | 0.8850 | 0.283 | 0.840 | 0.070 | 0.090 | 0.840 |
# | b - d | 1.0 | 0.9500 | 0.181 | 0.920 | 0.020 | 0.060 | 0.920 |
# | c - d | 0.5 | 0.5800 | 0.360 | 0.350 | 0.190 | 0.460 | 0.460 |
# --------------------------------------------------------------------------------------------
# | General Dominance |
# --------------------------------------------------------------------------------------------
# | a - b | 1.0 | 0.6500 | 0.479 | 0.650 | 0.350 | 0.000 | 0.650 |
# | a - c | 1.0 | 0.9800 | 0.141 | 0.980 | 0.020 | 0.000 | 0.980 |
# | a - d | 1.0 | 0.9900 | 0.100 | 0.990 | 0.010 | 0.000 | 0.990 |
# | b - c | 1.0 | 0.9000 | 0.302 | 0.900 | 0.100 | 0.000 | 0.900 |
# | b - d | 1.0 | 0.9700 | 0.171 | 0.970 | 0.030 | 0.000 | 0.970 |
# | c - d | 1.0 | 0.5600 | 0.499 | 0.560 | 0.440 | 0.000 | 0.560 |
# --------------------------------------------------------------------------------------------
#
# Table: General averages
# ---------------------------------------
# | var | mean | se | p.5 | p.95 |
# ---------------------------------------
# | a | 0.133 | 0.049 | 0.062 | 0.218 |
# | b | 0.106 | 0.048 | 0.029 | 0.199 |
# | c | 0.035 | 0.032 | 0.002 | 0.106 |
# | d | 0.023 | 0.019 | 0.002 | 0.062 |
# ---------------------------------------
#
# == References:
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
class Bootstrap
include Writable
include Summarizable
# Total Dominance results
attr_reader :samples_td
# Conditional Dominance results
attr_reader :samples_cd
# General Dominance results
attr_reader :samples_gd
# General average results
attr_reader :samples_ga
# Name of fields
attr_reader :fields
# Regression class used for analysis
attr_accessor :regression_class
# Dataset
attr_accessor :ds
# Name of analysis
attr_accessor :name
# Alpha level of confidence. Default: ALPHA
attr_accessor :alpha
# Debug?
attr_accessor :debug
# Default level of confidence for t calculation
ALPHA=0.95
# Create a new Dominance Analysis Bootstrap Object
#
# * ds: A Dataset object
# * y_var: Name of dependent variable
# * opts: Any other attribute of the class
def initialize(ds,y_var, opts=Hash.new)
@ds=ds
@y_var=y_var
@n=ds.cases
@n_samples=0
@alpha=ALPHA
@debug=false
if y_var.is_a? Array
@fields=ds.fields-y_var
@regression_class=Regression::Multiple::MultipleDependent
else
@fields=ds.fields-[y_var]
@regression_class=Regression::Multiple::MatrixEngine
end
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
opts.each{|k,v|
self.send("#{k}=",v) if self.respond_to? k
}
create_samples_pairs
end
# lr_class deprecated
alias_method :lr_class, :regression_class
def da
if @da.nil?
@da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
end
@da
end
# Creates n re-samples from original dataset and store result of
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
#
# * number_samples: Number of new samples to add
# * n: size of each new sample. If nil, equal to original sample size
def bootstrap(number_samples,n=nil)
number_samples.times{ |t|
@n_samples+=1
puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
ds_boot=@ds.bootstrap(n)
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
da_1.total_dominance.each{|k,v|
@samples_td[k].push(v)
}
da_1.conditional_dominance.each{|k,v|
@samples_cd[k].push(v)
}
da_1.general_dominance.each{|k,v|
@samples_gd[k].push(v)
}
da_1.general_averages.each{|k,v|
@samples_ga[k].push(v)
}
}
end
def create_samples_pairs
@samples_td={}
@samples_cd={}
@samples_gd={}
@pairs=[]
c=(0...@fields.size).to_a.combination(2)
c.each do |data|
p data
convert=data.collect {|i| @fields[i] }
@pairs.push(convert)
[@samples_td, @samples_cd, @samples_gd].each{|s|
s[convert]=[]
}
end
end
def t
Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
end
def report_building(builder) # :nodoc:
raise "You should bootstrap first" if @n_samples==0
builder.section(:name=>@name) do |generator|
generator.text _("Sample size: %d\n") % @n_samples
generator.text "t: #{t}\n"
generator.text _("Linear Regression Engine: %s") % @regression_class.name
table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
table.row([_("Complete dominance"),"","","","","","",""])
table.hr
@pairs.each{|pair|
std=@samples_td[pair].to_vector(:scale)
ttd=da.total_dominance_pairwise(pair[0],pair[1])
table.row(summary_pairs(pair,std,ttd))
}
table.hr
table.row([_("Conditional dominance"),"","","","","","",""])
table.hr
@pairs.each{|pair|
std=@samples_cd[pair].to_vector(:scale)
ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
table.row(summary_pairs(pair,std,ttd))
}
table.hr
table.row([_("General Dominance"),"","","","","","",""])
table.hr
@pairs.each{|pair|
std=@samples_gd[pair].to_vector(:scale)
ttd=da.general_dominance_pairwise(pair[0],pair[1])
table.row(summary_pairs(pair,std,ttd))
}
generator.parse_element(table)
table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
@fields.each{|f|
v=@samples_ga[f].to_vector(:scale)
row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
table.row(row)
}
generator.parse_element(table)
end
end
def summary_pairs(pair,std,ttd)
freqs=std.proportions
[0, 0.5, 1].each{|n|
freqs[n]=0 if freqs[n].nil?
}
name="%s - %s" % [@ds[pair[0]].name, @ds[pair[1]].name]
[name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
end
def f(v,n=3)
prec="%0.#{n}f"
sprintf(prec,v)
end
end
end
end
================================================
FILE: lib/statsample/dominanceanalysis.rb
================================================
module Statsample
# Dominance Analysis is a procedure based on an examination of the R<sup>2</sup> values
# for all possible subset models, to identify the relevance of one or more
# predictors in the prediction of criterium.
#
# See Budescu(1993), Azen & Budescu (2003, 2006) for more information.
#
# == Use
#
# a=1000.times.collect {rand}.to_scale
# b=1000.times.collect {rand}.to_scale
# c=1000.times.collect {rand}.to_scale
# ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
# da=Statsample::DominanceAnalysis.new(ds,'y')
# puts da.summary
#
# === Output:
#
# Report: Report 2010-02-08 19:10:11 -0300
# Table: Dominance Analysis result
# ------------------------------------------------------------
# | | r2 | sign | a | b | c |
# ------------------------------------------------------------
# | Model 0 | | | 0.648 | 0.265 | 0.109 |
# ------------------------------------------------------------
# | a | 0.648 | 0.000 | -- | 0.229 | 0.104 |
# | b | 0.265 | 0.000 | 0.612 | -- | 0.104 |
# | c | 0.109 | 0.000 | 0.643 | 0.260 | -- |
# ------------------------------------------------------------
# | k=1 Average | | | 0.627 | 0.244 | 0.104 |
# ------------------------------------------------------------
# | a*b | 0.877 | 0.000 | -- | -- | 0.099 |
# | a*c | 0.752 | 0.000 | -- | 0.224 | -- |
# | b*c | 0.369 | 0.000 | 0.607 | -- | -- |
# ------------------------------------------------------------
# | k=2 Average | | | 0.607 | 0.224 | 0.099 |
# ------------------------------------------------------------
# | a*b*c | 0.976 | 0.000 | -- | -- | -- |
# ------------------------------------------------------------
# | Overall averages | | | 0.628 | 0.245 | 0.104 |
# ------------------------------------------------------------
#
# Table: Pairwise dominance
# -----------------------------------------
# | Pairs | Total | Conditional | General |
# -----------------------------------------
# | a - b | 1.0 | 1.0 | 1.0 |
# | a - c | 1.0 | 1.0 | 1.0 |
# | b - c | 1.0 | 1.0 | 1.0 |
# -----------------------------------------
#
# == Reference:
# * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
# * Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
#
class DominanceAnalysis
include Summarizable
# Class to generate the regressions. Default to Statsample::Regression::Multiple::MatrixEngine
attr_accessor :regression_class
# Name of analysis
attr_accessor :name
# Set to true if you want to build from dataset, not correlation matrix
attr_accessor :build_from_dataset
# Array with independent variables. You could create subarrays,
# to test groups of predictors as blocks
attr_accessor :predictors
# If you provide a matrix as input, you should set
# the number of cases to define significance of R^2
attr_accessor :cases
# Method of :regression_class used to measure association.
#
# Only necessary to change if you have multivariate dependent.
# * :r2yx (R^2_yx), the default option, is the option when distinction
# between independent and dependents variable is arbitrary
# * :p2yx is the option when the distinction between independent and dependents variables is real.
#
attr_accessor :method_association
attr_reader :dependent
UNIVARIATE_REGRESSION_CLASS=Statsample::Regression::Multiple::MatrixEngine
MULTIVARIATE_REGRESSION_CLASS=Statsample::Regression::Multiple::MultipleDependent
def self.predictor_name(variable)
if variable.is_a? Array
sprintf("(%s)", variable.join(","))
else
variable
end
end
# Creates a new DominanceAnalysis object
# Parameters:
# * input: A Matrix or Dataset object
# * dependent: Name of dependent variable. Could be an array, if you want to
# do an Multivariate Regression Analysis. If nil, set to all
# fields on input, except criteria
def initialize(input, dependent, opts=Hash.new)
@build_from_dataset=false
if dependent.is_a? Array
@regression_class= MULTIVARIATE_REGRESSION_CLASS
@method_association=:r2yx
else
@regression_class= UNIVARIATE_REGRESSION_CLASS
@method_association=:r2
end
@name=nil
opts.each{|k,v|
self.send("#{k}=",v) if self.respond_to? k
}
@dependent=dependent
@dependent=[@dependent] unless @dependent.is_a? Array
@predictors ||= input.fields-@dependent
@name=_("Dominance Analysis: %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
if input.is_a? Statsample::Dataset
@ds=input
@matrix=Statsample::Bivariate.correlation_matrix(input)
@cases=Statsample::Bivariate.min_n_valid(input)
elsif input.is_a? ::Matrix
@ds=nil
@matrix=input
else
raise ArgumentError.new("You should use a Matrix or a Dataset")
end
@models=nil
@models_data=nil
@general_averages=nil
end
# Compute models.
def compute
create_models
fill_models
end
def models
if @models.nil?
compute
end
@models
end
def models_data
if @models_data.nil?
compute
end
@models_data
end
def create_models
@models=[]
@models_data={}
for i in 1..@predictors.size
c=(0...@predictors.size).to_a.combination(i)
c.each do |data|
independent=data.collect {|i1| @predictors[i1] }
@models.push(independent)
if (@build_from_dataset)
data=@ds.dup(independent.flatten+@dependent)
else
data=@matrix.submatrix(independent.flatten+@dependent)
end
modeldata=ModelData.new(independent, data, self)
models_data[independent.sort {|a,b| a.to_s<=>b.to_s}]=modeldata
end
end
end
def fill_models
@models.each do |m|
@predictors.each do |f|
next if m.include? f
base_model=md(m)
comp_model=md(m+[f])
base_model.add_contribution(f,comp_model.r2)
end
end
end
private :create_models, :fill_models
def dominance_for_nil_model(i,j)
if md([i]).r2>md([j]).r2
1
elsif md([i]).r2<md([j]).r2
0
else
0.5
end
end
# Returns 1 if i D k, 0 if j dominates i and 0.5 if undetermined
def total_dominance_pairwise(i,j)
dm=dominance_for_nil_model(i,j)
return 0.5 if dm==0.5
dominances=[dm]
models_data.each do |k,m|
if !m.contributions[i].nil? and !m.contributions[j].nil?
if m.contributions[i]>m.contributions[j]
dominances.push(1)
elsif m.contributions[i]<m.contributions[j]
dominances.push(0)
else
return 0.5
#dominances.push(0.5)
end
end
end
final=dominances.uniq
final.size>1 ? 0.5 : final[0]
end
# Returns 1 if i cD k, 0 if j cD i and 0.5 if undetermined
def conditional_dominance_pairwise(i,j)
dm=dominance_for_nil_model(i,j)
return 0.5 if dm==0.5
dominances=[dm]
for k in 1...@predictors.size
a=average_k(k)
if a[i]>a[j]
dominances.push(1)
elsif a[i]<a[j]
dominances.push(0)
else
return 0.5
#dominances.push(0.5)
end
end
final=dominances.uniq
final.size>1 ? 0.5 : final[0]
end
# Returns 1 if i gD k, 0 if j gD i and 0.5 if undetermined
def general_dominance_pairwise(i,j)
ga=general_averages
if ga[i]>ga[j]
1
elsif ga[i]<ga[j]
0
else
0.5
end
end
def pairs
models.find_all{|m| m.size==2}
end
def total_dominance
pairs.inject({}){|a,pair| a[pair]=total_dominance_pairwise(pair[0], pair[1])
a
}
end
def conditional_dominance
pairs.inject({}){|a,pair| a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
a
}
end
def general_dominance
pairs.inject({}){|a,pair| a[pair]=general_dominance_pairwise(pair[0], pair[1])
a
}
end
def md(m)
models_data[m.sort {|a,b| a.to_s<=>b.to_s}]
end
# Get all model of size k
def md_k(k)
out=[]
@models.each{|m| out.push(md(m)) if m.size==k }
out
end
# For a hash with arrays of numbers as values
# Returns a hash with same keys and
# value as the mean of values of original hash
def get_averages(averages)
out={}
averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
out
end
# Hash with average for each k size model.
def average_k(k)
return nil if k==@predictors.size
models=md_k(k)
averages=@predictors.inject({}) {|a,v| a[v]=[];a}
models.each do |m|
@predictors.each do |f|
averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
end
end
get_averages(averages)
end
def general_averages
if @general_averages.nil?
averages=@predictors.inject({}) {|a,v| a[v]=[md([v]).r2];a}
for k in 1...@predictors.size
ak=average_k(k)
@predictors.each do |f|
averages[f].push(ak[f])
end
end
@general_averages=get_averages(averages)
end
@general_averages
end
def report_building(g)
compute if @models.nil?
g.section(:name=>@name) do |generator|
header=["","r2",_("sign")]+@predictors.collect {|c| DominanceAnalysis.predictor_name(c) }
generator.table(:name=>_("Dominance Analysis result"), :header=>header) do |t|
row=[_("Model 0"),"",""]+@predictors.collect{|f|
sprintf("%0.3f",md([f]).r2)
}
t.row(row)
t.hr
for i in 1..@predictors.size
mk=md_k(i)
mk.each{|m|
t.row(m.add_table_row)
}
# Report averages
a=average_k(i)
if !a.nil?
t.hr
row=[_("k=%d Average") % i,"",""] + @predictors.collect{|f|
sprintf("%0.3f",a[f])
}
t.row(row)
t.hr
end
end
g=general_averages
t.hr
row=[_("Overall averages"),"",""]+@predictors.collect{|f|
sprintf("%0.3f",g[f])
}
t.row(row)
end
td=total_dominance
cd=conditional_dominance
gd=general_dominance
generator.table(:name=>_("Pairwise dominance"), :header=>[_("Pairs"),_("Total"),_("Conditional"),_("General")]) do |t|
pairs.each{|pair|
name=pair.map{|v| v.is_a?(Array) ? "("+v.join("-")+")" : v}.join(" - ")
row=[name, sprintf("%0.1f",td[pair]), sprintf("%0.1f",cd[pair]), sprintf("%0.1f",gd[pair])]
t.row(row)
}
end
end
end
class ModelData # :nodoc:
attr_reader :contributions
def initialize(independent, data, da)
@independent=independent
@data=data
@predictors=da.predictors
@dependent=da.dependent
@cases=da.cases
@method=da.method_association
@contributions=@independent.inject({}){|a,v| a[v]=nil;a}
r_class=da.regression_class
if @dependent.size==1
@lr=r_class.new(data, @dependent[0], :cases=>@cases)
else
@lr=r_class.new(data, @dependent, :cases=>@cases)
end
end
def add_contribution(f, v)
@contributions[f]=v-r2
end
def r2
@lr.send(@method)
end
def name
@independent.collect {|variable|
DominanceAnalysis.predictor_name(variable)
}.join("*")
end
def add_table_row
if @cases
sign=sprintf("%0.3f", @lr.probability)
else
sign="???"
end
[name, sprintf("%0.3f",r2), sign] + @predictors.collect{|k|
v=@contributions[k]
if v.nil?
"--"
else
sprintf("%0.3f",v)
end
}
end
def summary
out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",name, r2, @lr.significance, @lr.sst)
out << @predictors.collect{|k|
v=@contributions[k]
if v.nil?
"--"
else
sprintf("%s=%0.3f",k,v)
end
}.join(" | ")
out << "\n"
return out
end
end # end ModelData
end # end Dominance Analysis
end
require 'statsample/dominanceanalysis/bootstrap'
================================================
FILE: lib/statsample/factor/map.rb
================================================
module Statsample
module Factor
# = Velicer's Minimum Average Partial
#
# "Velicer’s (1976) MAP test involves a complete princi-
# pal components analysis followed by the examination of
# a series of matrices of partial correlations. Specifically,
# on the first step, the first principal component is par-
# tialed out of the correlations between the variables of in-
# terest, and the average squared coefficient in the off-
# diagonals of the resulting partial correlation matrix is
# computed. On the second step, the first two principal
# components are partialed out of the original correlation
# matrix and the average squared partial correlation is
# again computed. These computations are conducted for k
# (the number of variables) minus one steps. The average
# squared partial correlations from these steps are then
# lined up, and the number of components is determined by
# the step number in the analyses that resulted in the lowest
# average squared partial correlation. The average squared
# coefficient in the original correlation matrix is also com-
# puted, and if this coefficient happens to be lower than
# the lowest average squared partial correlation, then no
# components should be extracted from the correlation ma-
# trix. Statistically, components are retained as long as the
# variance in the correlation matrix represents systematic
# variance. Components are no longer retained when there
# is proportionately more unsystematic variance than sys-
# tematic variance." (O'Connor, 2000, p.397).
#
# Current algorithm is loosely based on SPSS O'Connor algorithm
#
# == Reference
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
#
class MAP
include Summarizable
include DirtyMemoize
# Name of analysis
attr_accessor :name
attr_reader :eigenvalues
# Number of factors to retain
attr_reader :number_of_factors
# Average squared correlations
attr_reader :fm
# Smallest average squared correlation
attr_reader :minfm
attr_accessor :use_gsl
def self.with_dataset(ds,opts=Hash.new)
new(ds.correlation_matrix,opts)
end
def initialize(matrix, opts=Hash.new)
@matrix=matrix
opts_default={
:use_gsl=>true,
:name=>_("Velicer's MAP")
}
@opts=opts_default.merge(opts)
opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
end
def compute
gsl_m=(use_gsl and Statsample.has_gsl?) ? @matrix.to_gsl : @matrix
klass_m=gsl_m.class
eigvect,@eigenvalues=gsl_m.eigenvectors_matrix, gsl_m.eigenvalues
eigenvalues_sqrt=@eigenvalues.collect {|v| Math.sqrt(v)}
loadings=eigvect*(klass_m.diagonal(*eigenvalues_sqrt))
fm=Array.new(@matrix.row_size)
ncol=@matrix.column_size
fm[0]=(gsl_m.mssq - ncol).quo(ncol*(ncol-1))
(ncol-1).times do |m|
puts "MAP:Eigenvalue #{m+1}" if $DEBUG
a=loadings[0..(loadings.row_size-1),0..m]
partcov= gsl_m - (a*a.transpose)
d=klass_m.diagonal(*(partcov.diagonal.collect {|v| Math::sqrt(1/v)}))
pr=d*partcov*d
fm[m+1]=(pr.mssq-ncol).quo(ncol*(ncol-1))
end
minfm=fm[0]
nfactors=0
@errors=[]
fm.each_with_index do |v,s|
if defined?(Complex) and v.is_a? ::Complex
@errors.push(s)
else
if v < minfm
minfm=v
nfactors=s
end
end
end
@number_of_factors=nfactors
@fm=fm
@minfm=minfm
end
def report_building(g) #:nodoc:
g.section(:name=>@name) do |s|
s.table(:name=>_("Eigenvalues"),:header=>[_("Value")]) do |t|
eigenvalues.each_with_index do |e,i|
t.row([@errors.include?(i) ? "*" : "%0.6f" % e])
end
end
s.table(:name=>_("Velicer's Average Squared Correlations"), :header=>[_("number of components"),_("average square correlation")]) do |t|
fm.each_with_index do |v,i|
t.row(["%d" % i, @errors.include?(i) ? "*" : "%0.6f" % v])
end
end
s.text(_("The smallest average squared correlation is : %0.6f" % minfm))
s.text(_("The number of components is : %d" % number_of_factors))
end
end
dirty_memoize :number_of_factors, :fm, :minfm, :eigenvalues
end
end
end
================================================
FILE: lib/statsample/factor/parallelanalysis.rb
================================================
module Statsample
module Factor
# Performs Horn's 'parallel analysis' to a principal components analysis
# to adjust for sample bias in the retention of components.
# Can create the bootstrap samples using random data, using number
# of cases and variables, parameters for actual data (mean and standard
# deviation of each variable) or bootstrap sampling for actual data.
# == Description
# "PA involves the construction of a number of correlation matrices of random variables based on the same sample size and number of variables in the real data set. The average eigenvalues from the random correlation matrices are then compared to the eigenvalues from the real data correlation matrix, such that the first observed eigenvalue is compared to the first random eigenvalue, the second observed eigenvalue is compared to the second random eigenvalue, and so on." (Hayton, Allen & Scarpello, 2004, p.194)
# == Usage
# *With real dataset*
# # ds should be any valid dataset
# pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>100, :bootstrap_method=>:data)
#
# *With number of cases and variables*
# pa=Statsample::Factor::ParallelAnalysis.with_random_data(100,8)
#
# == Reference
# * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
# * Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
class ParallelAnalysis
def self.with_random_data(cases,vars,opts=Hash.new)
require 'ostruct'
ds=OpenStruct.new
ds.fields=vars.times.map {|i| "v#{i+1}"}
ds.cases=cases
opts=opts.merge({:bootstrap_method=> :random, :no_data=>true})
new(ds, opts)
end
include DirtyMemoize
include Summarizable
# Number of random sets to produce. 50 by default
attr_accessor :iterations
# Name of analysis
at
gitextract_b74amxs6/
├── .gitignore
├── .travis.yml
├── Gemfile
├── History.txt
├── LICENSE.txt
├── Manifest.txt
├── README.md
├── Rakefile
├── benchmarks/
│ ├── correlation_matrix_15_variables.rb
│ ├── correlation_matrix_5_variables.rb
│ ├── correlation_matrix_methods/
│ │ ├── correlation_matrix.ds
│ │ ├── correlation_matrix.html
│ │ ├── correlation_matrix.rb
│ │ ├── correlation_matrix.xls
│ │ ├── correlation_matrix_gsl_ruby.ods
│ │ ├── correlation_matrix_with_graphics.ods
│ │ └── results.ds
│ ├── factor_map.rb
│ └── helpers_benchmark.rb
├── data/
│ └── locale/
│ └── es/
│ └── LC_MESSAGES/
│ └── statsample.mo
├── doc_latex/
│ └── manual/
│ └── equations.tex
├── examples/
│ ├── boxplot.rb
│ ├── correlation_matrix.rb
│ ├── dataset.rb
│ ├── dominance_analysis.rb
│ ├── dominance_analysis_bootstrap.rb
│ ├── histogram.rb
│ ├── icc.rb
│ ├── levene.rb
│ ├── multiple_regression.rb
│ ├── multivariate_correlation.rb
│ ├── parallel_analysis.rb
│ ├── polychoric.rb
│ ├── principal_axis.rb
│ ├── reliability.rb
│ ├── scatterplot.rb
│ ├── t_test.rb
│ ├── tetrachoric.rb
│ ├── u_test.rb
│ ├── vector.rb
│ └── velicer_map_test.rb
├── grab_references.rb
├── lib/
│ ├── spss.rb
│ ├── statsample/
│ │ ├── analysis/
│ │ │ ├── suite.rb
│ │ │ └── suitereportbuilder.rb
│ │ ├── analysis.rb
│ │ ├── anova/
│ │ │ ├── contrast.rb
│ │ │ ├── oneway.rb
│ │ │ └── twoway.rb
│ │ ├── anova.rb
│ │ ├── bivariate/
│ │ │ └── pearson.rb
│ │ ├── bivariate.rb
│ │ ├── codification.rb
│ │ ├── converter/
│ │ │ ├── csv.rb
│ │ │ └── spss.rb
│ │ ├── converters.rb
│ │ ├── crosstab.rb
│ │ ├── dataset.rb
│ │ ├── dominanceanalysis/
│ │ │ └── bootstrap.rb
│ │ ├── dominanceanalysis.rb
│ │ ├── factor/
│ │ │ ├── map.rb
│ │ │ ├── parallelanalysis.rb
│ │ │ ├── pca.rb
│ │ │ ├── principalaxis.rb
│ │ │ └── rotation.rb
│ │ ├── factor.rb
│ │ ├── graph/
│ │ │ ├── boxplot.rb
│ │ │ ├── histogram.rb
│ │ │ └── scatterplot.rb
│ │ ├── graph.rb
│ │ ├── histogram.rb
│ │ ├── matrix.rb
│ │ ├── multiset.rb
│ │ ├── regression/
│ │ │ ├── multiple/
│ │ │ │ ├── alglibengine.rb
│ │ │ │ ├── baseengine.rb
│ │ │ │ ├── gslengine.rb
│ │ │ │ ├── matrixengine.rb
│ │ │ │ └── rubyengine.rb
│ │ │ ├── multiple.rb
│ │ │ └── simple.rb
│ │ ├── regression.rb
│ │ ├── reliability/
│ │ │ ├── icc.rb
│ │ │ ├── multiscaleanalysis.rb
│ │ │ ├── scaleanalysis.rb
│ │ │ └── skillscaleanalysis.rb
│ │ ├── reliability.rb
│ │ ├── resample.rb
│ │ ├── rserve_extension.rb
│ │ ├── shorthand.rb
│ │ ├── srs.rb
│ │ ├── test/
│ │ │ ├── bartlettsphericity.rb
│ │ │ ├── chisquare.rb
│ │ │ ├── f.rb
│ │ │ ├── kolmogorovsmirnov.rb
│ │ │ ├── levene.rb
│ │ │ ├── t.rb
│ │ │ ├── umannwhitney.rb
│ │ │ └── wilcoxonsignedrank.rb
│ │ ├── test.rb
│ │ ├── vector/
│ │ │ └── gsl.rb
│ │ ├── vector.rb
│ │ └── version.rb
│ └── statsample.rb
├── po/
│ ├── es/
│ │ ├── statsample.mo
│ │ └── statsample.po
│ └── statsample.pot
├── references.txt
├── setup.rb
├── test/
│ ├── fixtures/
│ │ ├── correlation_matrix.rb
│ │ ├── hartman_23.matrix
│ │ ├── repeated_fields.csv
│ │ ├── stock_data.csv
│ │ ├── test_csv.csv
│ │ ├── test_xls.xls
│ │ ├── tetmat_matrix.txt
│ │ └── tetmat_test.txt
│ ├── helpers_tests.rb
│ ├── test_analysis.rb
│ ├── test_anova_contrast.rb
│ ├── test_anovaoneway.rb
│ ├── test_anovatwoway.rb
│ ├── test_anovatwowaywithdataset.rb
│ ├── test_anovawithvectors.rb
│ ├── test_awesome_print_bug.rb
│ ├── test_bartlettsphericity.rb
│ ├── test_bivariate.rb
│ ├── test_codification.rb
│ ├── test_crosstab.rb
│ ├── test_csv.rb
│ ├── test_dataset.rb
│ ├── test_dominance_analysis.rb
│ ├── test_factor.rb
│ ├── test_factor_map.rb
│ ├── test_factor_pa.rb
│ ├── test_ggobi.rb
│ ├── test_gsl.rb
│ ├── test_histogram.rb
│ ├── test_matrix.rb
│ ├── test_multiset.rb
│ ├── test_regression.rb
│ ├── test_reliability.rb
│ ├── test_reliability_icc.rb
│ ├── test_reliability_skillscale.rb
│ ├── test_resample.rb
│ ├── test_rserve_extension.rb
│ ├── test_srs.rb
│ ├── test_statistics.rb
│ ├── test_stest.rb
│ ├── test_stratified.rb
│ ├── test_test_f.rb
│ ├── test_test_kolmogorovsmirnov.rb
│ ├── test_test_t.rb
│ ├── test_umannwhitney.rb
│ ├── test_vector.rb
│ ├── test_wilcoxonsignedrank.rb
│ └── test_xls.rb
└── web/
└── Rakefile
SYMBOL INDEX (1478 symbols across 104 files)
FILE: benchmarks/correlation_matrix_methods/correlation_matrix.rb
function create_dataset (line 7) | def create_dataset(vars,cases)
function prediction_pairwise (line 15) | def prediction_pairwise(vars,cases)
function prediction_optimized (line 18) | def prediction_optimized(vars,cases)
FILE: lib/spss.rb
type SPSS (line 9) | module SPSS # :nodoc: all
type Dictionary (line 10) | module Dictionary
class Element (line 11) | class Element
method add (line 12) | def add(a)
method parse_elements (line 15) | def parse_elements(func=:to_s)
method init_with (line 18) | def init_with config
method initialize (line 23) | def initialize(config={})
class Dictionary (line 28) | class Dictionary < Element
method initialize (line 30) | def initialize(config={})
method to_xml (line 40) | def to_xml
method to_spss (line 44) | def to_spss
class MissingValue (line 49) | class MissingValue < Element
method initialize (line 51) | def initialize(data,type=nil)
method to_xml (line 59) | def to_xml
class LabelSet (line 63) | class LabelSet
method initialize (line 65) | def initialize(labels)
method parse_xml (line 68) | def parse_xml(name)
method parse_spss (line 71) | def parse_spss()
class Variable (line 75) | class Variable < Element
method initialize (line 77) | def initialize(config={})
method to_xml (line 96) | def to_xml
method to_spss (line 101) | def to_spss
FILE: lib/statsample.rb
class Numeric (line 29) | class Numeric
method square (line 30) | def square ; self * self ; end
class String (line 33) | class String
method is_number? (line 34) | def is_number?
class Module (line 43) | class Module
method include_aliasing (line 44) | def include_aliasing(m, suffix="ruby")
class Array (line 55) | class Array
method recode_repeated (line 62) | def recode_repeated
function create_test (line 82) | def create_test(*args,&proc)
function bindtextdomain (line 92) | def bindtextdomain(d) #:nodoc:
type GetText (line 97) | module GetText #:nodoc:
function _ (line 98) | def _(t)
type Statsample (line 117) | module Statsample
function create_has_library (line 119) | def self.create_has_library(library)
function load (line 166) | def load(filename)
function vector_cols_matrix (line 182) | def vector_cols_matrix(*vs)
function only_valid (line 204) | def only_valid(*vs)
function only_valid_clone (line 214) | def only_valid_clone(*vs)
type Util (line 226) | module Util
function normal_order_statistic_medians (line 228) | def normal_order_statistic_medians(i,n)
function nice (line 239) | def self.nice(s,e) # :nodoc:
type Writable (line 257) | module Writable
function save (line 258) | def save(filename)
type Summarizable (line 265) | module Summarizable
function summary (line 268) | def summary(method=:to_text)
type STATSAMPLE__ (line 272) | module STATSAMPLE__ #:nodoc:
type Statsample (line 282) | module Statsample
function create_has_library (line 119) | def self.create_has_library(library)
function load (line 166) | def load(filename)
function vector_cols_matrix (line 182) | def vector_cols_matrix(*vs)
function only_valid (line 204) | def only_valid(*vs)
function only_valid_clone (line 214) | def only_valid_clone(*vs)
type Util (line 226) | module Util
function normal_order_statistic_medians (line 228) | def normal_order_statistic_medians(i,n)
function nice (line 239) | def self.nice(s,e) # :nodoc:
type Writable (line 257) | module Writable
function save (line 258) | def save(filename)
type Summarizable (line 265) | module Summarizable
function summary (line 268) | def summary(method=:to_text)
type STATSAMPLE__ (line 272) | module STATSAMPLE__ #:nodoc:
FILE: lib/statsample/analysis.rb
type Statsample (line 4) | module Statsample
type Analysis (line 32) | module Analysis
function clear_analysis (line 35) | def self.clear_analysis
function stored_analysis (line 38) | def self.stored_analysis
function last (line 41) | def self.last
function store (line 44) | def self.store(name, opts=Hash.new,&block)
function run (line 53) | def self.run(*args)
function add_to_reportbuilder (line 66) | def self.add_to_reportbuilder(rb, *args)
function save (line 79) | def self.save(filename, *args)
function to_text (line 89) | def self.to_text(*args)
function run_batch (line 96) | def self.run_batch(*args)
FILE: lib/statsample/analysis/suite.rb
type Statsample (line 1) | module Statsample
type Analysis (line 2) | module Analysis
class Suite (line 3) | class Suite
method initialize (line 8) | def initialize(opts=Hash.new(), &block)
method run (line 19) | def run
method desc (line 24) | def desc(d)
method echo (line 28) | def echo(*args)
method summary (line 31) | def summary(obj)
method add_to_reportbuilder (line 34) | def add_to_reportbuilder(rb)
method generate (line 38) | def generate(filename)
method to_text (line 42) | def to_text
method attach (line 47) | def attach(ds)
method detach (line 50) | def detach(ds=nil)
method show_svg (line 61) | def show_svg(svg)
method boxplot (line 71) | def boxplot(*args)
method histogram (line 74) | def histogram(*args)
method scatterplot (line 77) | def scatterplot(*args)
method method_missing (line 81) | def method_missing(name, *args,&block)
FILE: lib/statsample/analysis/suitereportbuilder.rb
type Statsample (line 1) | module Statsample
type Analysis (line 2) | module Analysis
class SuiteReportBuilder (line 3) | class SuiteReportBuilder < Suite
method initialize (line 5) | def initialize(opts=Hash.new,&block)
method generate (line 12) | def generate(filename)
method to_text (line 16) | def to_text
method summary (line 20) | def summary(o)
method desc (line 23) | def desc(d)
method echo (line 26) | def echo(*args)
method boxplot (line 32) | def boxplot(*args)
method histogram (line 35) | def histogram(*args)
method boxplot (line 38) | def boxplot(*args)
FILE: lib/statsample/anova.rb
type Statsample (line 1) | module Statsample
type Anova (line 2) | module Anova
function oneway (line 4) | def oneway(*args)
function twoway (line 7) | def twoway(*args)
function oneway_with_vectors (line 11) | def oneway_with_vectors(*args)
function twoway_with_vectors (line 14) | def twoway_with_vectors(*args)
FILE: lib/statsample/anova/contrast.rb
type Statsample (line 1) | module Statsample
type Anova (line 2) | module Anova
class Contrast (line 3) | class Contrast
method initialize (line 8) | def initialize(opts=Hash.new)
method c_by_index (line 23) | def c_by_index(c1,c2)
method psi (line 30) | def psi
method confidence_interval (line 37) | def confidence_interval(cl=nil)
method c (line 44) | def c(args=nil)
method standard_error (line 52) | def standard_error
method df (line 59) | def df
method t_object (line 62) | def t_object
method t (line 65) | def t
method probability (line 68) | def probability
method report_building (line 71) | def report_building(builder)
FILE: lib/statsample/anova/oneway.rb
type Statsample (line 1) | module Statsample
type Anova (line 2) | module Anova
class OneWay (line 8) | class OneWay
method initialize (line 15) | def initialize(opts=Hash.new)
method f (line 46) | def f
method probability (line 50) | def probability
method report_building (line 53) | def report_building(builder) #:nodoc:
method report_building_table (line 58) | def report_building_table(builder) #:nodoc:
class OneWayWithVectors (line 81) | class OneWayWithVectors < OneWay
method initialize (line 91) | def initialize(*args)
method contrast (line 120) | def contrast(opts=Hash.new)
method levene (line 128) | def levene
method total_mean (line 132) | def total_mean
method sswg (line 137) | def sswg
method ssbg (line 141) | def ssbg
method df_wg (line 148) | def df_wg
method k (line 151) | def k
method df_bg (line 155) | def df_bg
method n (line 159) | def n
method report_building (line 162) | def report_building(builder) # :nodoc:
FILE: lib/statsample/anova/twoway.rb
type Statsample (line 1) | module Statsample
type Anova (line 2) | module Anova
class TwoWay (line 8) | class TwoWay
method initialize (line 23) | def initialize(opts=Hash.new)
method f_a (line 72) | def f_a
method f_b (line 75) | def f_b
method f_axb (line 78) | def f_axb
method f_a_probability (line 81) | def f_a_probability
method f_b_probability (line 84) | def f_b_probability
method f_axb_probability (line 87) | def f_axb_probability
method report_building (line 92) | def report_building(builder) #:nodoc:
method report_building_table (line 97) | def report_building_table(builder) #:nodoc:
class TwoWayWithVectors (line 115) | class TwoWayWithVectors < TwoWay
method initialize (line 122) | def initialize(opts=Hash.new)
method levene (line 183) | def levene
method report_building (line 186) | def report_building(builder) #:nodoc:#
FILE: lib/statsample/bivariate.rb
type Statsample (line 2) | module Statsample
type Bivariate (line 8) | module Bivariate
function covariance (line 13) | def covariance(v1,v2)
function maximum_likehood_dichotomic (line 23) | def maximum_likehood_dichotomic(pred,real)
function covariance_slow (line 32) | def covariance_slow(v1,v2) # :nodoc:
function sum_of_squares (line 36) | def sum_of_squares(v1,v2)
function pearson (line 43) | def pearson(v1,v2)
function pearson_slow (line 52) | def pearson_slow(v1,v2) # :nodoc:
function t_pearson (line 61) | def t_pearson(v1,v2)
function t_r (line 73) | def t_r(r,size)
function prop_pearson (line 83) | def prop_pearson(t, size, tails=:both)
function prediction_pairwise (line 105) | def prediction_pairwise(vars,cases)
function prediction_optimized (line 111) | def prediction_optimized(vars,cases)
function residuals (line 117) | def residuals(from,del)
function partial_correlation (line 132) | def partial_correlation(v1,v2,control)
function covariance_matrix_optimized (line 141) | def covariance_matrix_optimized(ds)
function covariance_matrix (line 155) | def covariance_matrix(ds)
function covariance_matrix_pairwise (line 169) | def covariance_matrix_pairwise(ds)
function correlation_matrix (line 191) | def correlation_matrix(ds)
function correlation_matrix_optimized (line 203) | def correlation_matrix_optimized(ds)
function correlation_matrix_pairwise (line 213) | def correlation_matrix_pairwise(ds)
function n_valid_matrix (line 233) | def n_valid_matrix(ds)
function correlation_probability_matrix (line 247) | def correlation_probability_matrix(ds, tails=:both)
function spearman (line 258) | def spearman(v1,v2)
function point_biserial (line 265) | def point_biserial(dichotomous,continous)
function tau_a (line 276) | def tau_a(v1,v2)
function tau_b (line 295) | def tau_b(matrix)
function gamma (line 305) | def gamma(matrix)
function pairs (line 310) | def pairs(matrix)
function ordered_pairs (line 351) | def ordered_pairs(vector)
function min_n_valid (line 373) | def min_n_valid(ds)
FILE: lib/statsample/bivariate/pearson.rb
type Statsample (line 1) | module Statsample
type Bivariate (line 2) | module Bivariate
class Pearson (line 18) | class Pearson
method initialize (line 27) | def initialize(v1,v2,opts=Hash.new)
method r (line 40) | def r
method t (line 43) | def t
method probability (line 46) | def probability
method report_building (line 49) | def report_building(builder)
FILE: lib/statsample/codification.rb
type Statsample (line 3) | module Statsample
type Codification (line 30) | module Codification
function create_hash (line 35) | def create_hash(dataset, vectors, sep=Statsample::SPLIT_TOKEN)
function create_yaml (line 56) | def create_yaml(dataset, vectors, io=nil, sep=Statsample::SPLIT_TOKEN)
function create_excel (line 67) | def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_T...
function excel_to_recoded_hash (line 87) | def excel_to_recoded_hash(filename)
function inverse_hash (line 102) | def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
function dictionary (line 112) | def dictionary(h, sep=Statsample::SPLIT_TOKEN)
function recode_vector (line 116) | def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
function recode_dataset_simple! (line 127) | def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample:...
function recode_dataset_split! (line 130) | def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::...
function _recode_dataset (line 134) | def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=...
function verify (line 156) | def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
FILE: lib/statsample/converter/csv.rb
type Statsample (line 1) | module Statsample
class CSV (line 2) | class CSV < SpreadsheetBase
method read19 (line 12) | def read19(filename,ignore_lines=0,csv_opts=Hash.new)
method read (line 34) | def read(filename, empty=[''],ignore_lines=0,csv_opts=Hash.new)
method write (line 65) | def write(dataset,filename, convert_comma=false,*opts)
FILE: lib/statsample/converter/spss.rb
type Statsample (line 1) | module Statsample
type SPSS (line 2) | module SPSS
function tetrachoric_correlation_matrix (line 9) | def tetrachoric_correlation_matrix(ds)
FILE: lib/statsample/converters.rb
type Statsample (line 2) | module Statsample
type Database (line 4) | module Database
function read (line 13) | def read(dbh,query)
function insert (line 39) | def insert(ds, dbh, table)
function create_sql (line 54) | def create_sql(ds,table,charset="UTF8")
type Mondrian (line 64) | module Mondrian
function write (line 66) | def write(dataset,filename)
class SpreadsheetBase (line 77) | class SpreadsheetBase
method extract_fields (line 79) | def extract_fields(row)
method process_row (line 92) | def process_row(row,empty)
method convert_to_scale_and_date (line 109) | def convert_to_scale_and_date(ds,fields)
class PlainText (line 121) | class PlainText < SpreadsheetBase
method read (line 123) | def read(filename, fields)
class Excel (line 140) | class Excel < SpreadsheetBase
method write (line 144) | def write(dataset,filename)
method preprocess_row (line 162) | def preprocess_row(row, dates)
method read (line 185) | def read(filename, opts=Hash.new)
type Mx (line 245) | module Mx
function write (line 247) | def write(dataset,filename,type=:covariance)
type GGobi (line 282) | module GGobi
function write (line 284) | def write(dataset,filename,opt={})
function out (line 289) | def out(dataset,opt={})
function values_definition (line 332) | def values_definition(c,missing)
function variable_definition (line 347) | def variable_definition(carrier,v,name,nickname=nil)
FILE: lib/statsample/crosstab.rb
type Statsample (line 1) | module Statsample
class Crosstab (line 6) | class Crosstab
method initialize (line 10) | def initialize(v1, v2, opts=Hash.new)
method rows_names (line 24) | def rows_names
method cols_names (line 27) | def cols_names
method rows_total (line 30) | def rows_total
method cols_total (line 33) | def cols_total
method frequencies (line 37) | def frequencies
method to_matrix (line 46) | def to_matrix
method frequencies_by_row (line 54) | def frequencies_by_row
method frequencies_by_col (line 61) | def frequencies_by_col
method chi_square (line 69) | def chi_square
method matrix_expected (line 74) | def matrix_expected
method cols_empty_hash (line 87) | def cols_empty_hash
method report_building (line 90) | def report_building(builder)
method table_percentage (line 138) | def table_percentage(generator,type)
FILE: lib/statsample/dataset.rb
class Hash (line 3) | class Hash
method to_dataset (line 5) | def to_dataset(*args)
class Array (line 10) | class Array
method prefix (line 11) | def prefix(s) # :nodoc:
method suffix (line 14) | def suffix(s) # :nodoc:
type Statsample (line 19) | module Statsample
class DatasetException (line 20) | class DatasetException < RuntimeError # :nodoc:
method initialize (line 22) | def initialize(ds,e)
method to_s (line 26) | def to_s
class Dataset (line 59) | class Dataset
method crosstab_by_asignation (line 92) | def self.crosstab_by_asignation(rows,columns,values)
method has_missing_data? (line 119) | def has_missing_data?
method nest (line 128) | def nest(*tree_keys,&block)
method initialize (line 158) | def initialize(vectors={}, fields=[])
method dup_only_valid (line 183) | def dup_only_valid(*fields_to_include)
method dup (line 211) | def dup(*fields_to_include)
method from_to (line 230) | def from_to(from,to)
method clone_only_valid (line 242) | def clone_only_valid(*fields_to_include)
method clone (line 257) | def clone(*fields_to_include)
method dup_empty (line 275) | def dup_empty
method merge (line 287) | def merge(other_ds)
method join (line 308) | def join(other_ds,fields_1=[],fields_2=[],type=:left)
method standarize (line 347) | def standarize
method collect_matrix (line 358) | def collect_matrix
method == (line 370) | def ==(d2)
method col (line 376) | def col(c)
method add_vector (line 383) | def add_vector(name, vector)
method has_vector? (line 392) | def has_vector? (v)
method bootstrap (line 399) | def bootstrap(n=nil)
method add_case_array (line 413) | def add_case_array(v)
method add_case (line 424) | def add_case(v,uvd=true)
method update_valid_data (line 445) | def update_valid_data
method delete_vector (line 451) | def delete_vector(*args)
method add_vectors_by_split_recode (line 463) | def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT...
method add_vectors_by_split (line 473) | def add_vectors_by_split(name,join='-',sep=Statsample::SPLIT_TOKEN)
method vector_by_calculation (line 480) | def vector_by_calculation(type=:scale)
method vector_sum (line 489) | def vector_sum(fields=nil)
method check_fields (line 502) | def check_fields(fields)
method vector_missing_values (line 509) | def vector_missing_values(fields=nil)
method vector_count_characters (line 517) | def vector_count_characters(fields=nil)
method vector_mean (line 529) | def vector_mean(fields=nil, max_invalid=0)
method check_length (line 555) | def check_length # :nodoc:
method each_vector (line 570) | def each_vector # :yield: |key, vector|
method case_as_hash (line 575) | def case_as_hash(c) # :nodoc:
method case_as_hash (line 580) | def case_as_hash(i)
method case_as_array (line 586) | def case_as_array(c) # :nodoc:
method case_as_array (line 591) | def case_as_array(i)
method _case_as_hash (line 595) | def _case_as_hash(c) # :nodoc:
method _case_as_array (line 598) | def _case_as_array(c) # :nodoc:
method each (line 603) | def each
method each_with_index (line 618) | def each_with_index # :yield: |case, i|
method each_array_with_nils (line 633) | def each_array_with_nils
method each_array (line 647) | def each_array
method fields= (line 657) | def fields=(f)
method check_order (line 663) | def check_order #:nodoc:
method [] (line 670) | def[](i)
method collect (line 683) | def collect(type=:scale)
method collect_with_index (line 691) | def collect_with_index(type=:scale)
method recode! (line 699) | def recode!(vector_name)
method crosstab (line 706) | def crosstab(v1,v2,opts={})
method []= (line 709) | def[]=(i,v)
method to_matrix (line 719) | def to_matrix
method clear_gsl (line 728) | def clear_gsl
method to_gsl (line 732) | def to_gsl
method correlation_matrix (line 749) | def correlation_matrix(fields=nil)
method covariance_matrix (line 759) | def covariance_matrix(fields=nil)
method filter (line 769) | def filter
method filter_field (line 780) | def filter_field(field)
method to_multiset_by_split (line 792) | def to_multiset_by_split(*fields)
method to_multiset_by_split_one_field (line 802) | def to_multiset_by_split_one_field(field)
method to_multiset_by_split_multiple_fields (line 823) | def to_multiset_by_split_multiple_fields(*fields)
method compute (line 869) | def compute(text)
method verify (line 895) | def verify(*tests)
method to_s (line 918) | def to_s
method inspect (line 921) | def inspect
method one_to_many (line 952) | def one_to_many(parent_fields, pattern)
method report_building (line 995) | def report_building(b)
FILE: lib/statsample/dominanceanalysis.rb
type Statsample (line 1) | module Statsample
class DominanceAnalysis (line 58) | class DominanceAnalysis
method predictor_name (line 88) | def self.predictor_name(variable)
method initialize (line 102) | def initialize(input, dependent, opts=Hash.new)
method compute (line 138) | def compute
method models (line 142) | def models
method models_data (line 149) | def models_data
method create_models (line 155) | def create_models
method fill_models (line 175) | def fill_models
method dominance_for_nil_model (line 187) | def dominance_for_nil_model(i,j)
method total_dominance_pairwise (line 197) | def total_dominance_pairwise(i,j)
method conditional_dominance_pairwise (line 218) | def conditional_dominance_pairwise(i,j)
method general_dominance_pairwise (line 237) | def general_dominance_pairwise(i,j)
method pairs (line 247) | def pairs
method total_dominance (line 250) | def total_dominance
method conditional_dominance (line 255) | def conditional_dominance
method general_dominance (line 260) | def general_dominance
method md (line 266) | def md(m)
method md_k (line 270) | def md_k(k)
method get_averages (line 280) | def get_averages(averages)
method average_k (line 286) | def average_k(k)
method general_averages (line 297) | def general_averages
method report_building (line 312) | def report_building(g)
class ModelData (line 363) | class ModelData # :nodoc:
method initialize (line 365) | def initialize(independent, data, da)
method add_contribution (line 382) | def add_contribution(f, v)
method r2 (line 385) | def r2
method name (line 388) | def name
method add_table_row (line 393) | def add_table_row
method summary (line 409) | def summary
FILE: lib/statsample/dominanceanalysis/bootstrap.rb
type Statsample (line 1) | module Statsample
class DominanceAnalysis (line 2) | class DominanceAnalysis
class Bootstrap (line 67) | class Bootstrap
method initialize (line 97) | def initialize(ds,y_var, opts=Hash.new)
method da (line 123) | def da
method bootstrap (line 135) | def bootstrap(number_samples,n=nil)
method create_samples_pairs (line 156) | def create_samples_pairs
method t (line 171) | def t
method report_building (line 174) | def report_building(builder) # :nodoc:
method summary_pairs (line 220) | def summary_pairs(pair,std,ttd)
method f (line 228) | def f(v,n=3)
FILE: lib/statsample/factor.rb
type Statsample (line 7) | module Statsample
type Factor (line 28) | module Factor
function anti_image_covariance_matrix (line 36) | def self.anti_image_covariance_matrix(matrix)
function anti_image_correlation_matrix (line 43) | def self.anti_image_correlation_matrix(matrix)
function kmo (line 63) | def self.kmo(matrix)
function kmo_univariate (line 79) | def self.kmo_univariate(matrix, var)
FILE: lib/statsample/factor/map.rb
type Statsample (line 1) | module Statsample
type Factor (line 2) | module Factor
class MAP (line 39) | class MAP
method with_dataset (line 53) | def self.with_dataset(ds,opts=Hash.new)
method initialize (line 56) | def initialize(matrix, opts=Hash.new)
method compute (line 65) | def compute
method report_building (line 103) | def report_building(g) #:nodoc:
FILE: lib/statsample/factor/parallelanalysis.rb
type Statsample (line 1) | module Statsample
type Factor (line 2) | module Factor
class ParallelAnalysis (line 23) | class ParallelAnalysis
method with_random_data (line 24) | def self.with_random_data(cases,vars,opts=Hash.new)
method initialize (line 62) | def initialize(ds, opts=Hash.new)
method number_of_factors (line 83) | def number_of_factors
method report_building (line 94) | def report_building(g) #:nodoc:
method compute (line 122) | def compute
FILE: lib/statsample/factor/pca.rb
type Statsample (line 2) | module Statsample
type Factor (line 3) | module Factor
class PCA (line 37) | class PCA
method initialize (line 54) | def initialize(matrix, opts=Hash.new)
method rotation (line 86) | def rotation
method total_eigenvalues (line 89) | def total_eigenvalues
method create_centered_ds (line 92) | def create_centered_ds
method feature_matrix (line 104) | def feature_matrix(m=nil)
method principal_components (line 126) | def principal_components(input, m=nil)
method component_matrix (line 143) | def component_matrix(m=nil)
method component_matrix_covariance (line 149) | def component_matrix_covariance(m=nil)
method component_matrix_correlation (line 168) | def component_matrix_correlation(m=nil)
method communalities (line 186) | def communalities(m=nil)
method eigenvalues (line 200) | def eigenvalues
method eigenvectors (line 203) | def eigenvectors
method calculate_eigenpairs (line 208) | def calculate_eigenpairs
method report_building (line 213) | def report_building(builder) # :nodoc:
FILE: lib/statsample/factor/principalaxis.rb
type Statsample (line 1) | module Statsample
type Factor (line 2) | module Factor
class PrincipalAxis (line 28) | class PrincipalAxis
method initialize (line 65) | def initialize(matrix, opts=Hash.new)
method communalities (line 97) | def communalities(m=nil)
method component_matrix (line 105) | def component_matrix(m=nil)
method iterate (line 112) | def iterate(m=nil)
method initial_communalities (line 154) | def initial_communalities
method separate_matrices (line 178) | def self.separate_matrices(matrix, y)
method report_building (line 197) | def report_building(generator)
FILE: lib/statsample/factor/rotation.rb
type Statsample (line 1) | module Statsample
type Factor (line 2) | module Factor
class Rotation (line 20) | class Rotation
method initialize (line 34) | def initialize(matrix, opts=Hash.new)
method report_building (line 49) | def report_building(g)
method compute (line 57) | def compute
method iterate (line 61) | def iterate
class Varimax (line 162) | class Varimax < Rotation
method x (line 163) | def x(a,b,c,d)
method y (line 166) | def y(a,b,c,d)
method rotation_name (line 169) | def rotation_name
class Equimax (line 173) | class Equimax < Rotation
method x (line 174) | def x(a,b,c,d)
method y (line 177) | def y(a,b,c,d)
method rotation_name (line 180) | def rotation_name
class Quartimax (line 185) | class Quartimax < Rotation
method x (line 186) | def x(a,b,c,d)
method y (line 189) | def y(a,b,c,d)
method rotation_name (line 192) | def rotation_name
FILE: lib/statsample/graph.rb
type Statsample (line 4) | module Statsample
type Graph (line 9) | module Graph
FILE: lib/statsample/graph/boxplot.rb
type Statsample (line 2) | module Statsample
type Graph (line 3) | module Graph
class Boxplot (line 21) | class Boxplot
method initialize (line 58) | def initialize(opts=Hash.new)
method rubyvis_panel (line 80) | def rubyvis_panel # :nodoc:
method to_svg (line 231) | def to_svg
method report_building (line 236) | def report_building(builder) # :nodoc:
FILE: lib/statsample/graph/histogram.rb
type Statsample (line 2) | module Statsample
type Graph (line 3) | module Graph
class Histogram (line 17) | class Histogram
method initialize (line 47) | def initialize(data, opts=Hash.new)
method pre_vis (line 68) | def pre_vis # :nodoc:
method rubyvis_normal_distribution (line 80) | def rubyvis_normal_distribution(pan)
method rubyvis_panel (line 106) | def rubyvis_panel # :nodoc:
method to_svg (line 176) | def to_svg
method report_building (line 181) | def report_building(builder) # :nodoc:
method report_building_text (line 186) | def report_building_text(generator)
FILE: lib/statsample/graph/scatterplot.rb
type Statsample (line 2) | module Statsample
type Graph (line 3) | module Graph
class Scatterplot (line 23) | class Scatterplot
method initialize (line 68) | def initialize(v1,v2,opts=Hash.new)
method add_line_median (line 95) | def add_line_median(vis) # :nodoc:
method rubyvis_panel (line 120) | def rubyvis_panel # :nodoc:
method to_svg (line 199) | def to_svg
method report_building (line 204) | def report_building(builder) # :nodoc:
FILE: lib/statsample/histogram.rb
type Statsample (line 1) | module Statsample
class Histogram (line 40) | class Histogram
method alloc (line 44) | def alloc(n_bins, range=nil, opts=Hash.new)
method alloc_uniform (line 50) | def alloc_uniform(n_bins, p1=nil,p2=nil)
method initialize (line 67) | def initialize(p1, min_max=false, opts=Hash.new)
method bins (line 90) | def bins
method increment (line 94) | def increment(x, w=1)
method set_ranges (line 106) | def set_ranges(range)
method get_range (line 110) | def get_range(i)
method max (line 113) | def max
method min (line 116) | def min
method max_val (line 119) | def max_val
method min_val (line 122) | def min_val
method each (line 125) | def each
method estimated_variance (line 132) | def estimated_variance
method estimated_standard_deviation (line 141) | def estimated_standard_deviation
method estimated_mean (line 144) | def estimated_mean
method sum (line 155) | def sum(start=nil,_end=nil)
method report_building (line 160) | def report_building(generator)
method report_building_text (line 164) | def report_building_text(generator)
FILE: lib/statsample/matrix.rb
class ::Vector (line 1) | class ::Vector
method to_matrix (line 2) | def to_matrix
method to_vector (line 5) | def to_vector
class ::Matrix (line 9) | class ::Matrix
method to_matrix (line 10) | def to_matrix
method to_dataset (line 13) | def to_dataset
method eigenpairs (line 33) | def eigenpairs
method eigenvalues (line 38) | def eigenvalues
method eigenvectors (line 41) | def eigenvectors
method eigenvectors_matrix (line 44) | def eigenvectors_matrix
method to_gsl (line 52) | def to_gsl
type GSL (line 61) | module GSL
class Vector (line 62) | class Vector
class Col (line 63) | class Col
method to_matrix (line 64) | def to_matrix
method to_ary (line 67) | def to_ary
method to_gsl (line 70) | def to_gsl
class Matrix (line 75) | class Matrix
method to_gsl (line 76) | def to_gsl
method to_dataset (line 80) | def to_dataset
method row_size (line 95) | def row_size
method column_size (line 98) | def column_size
method determinant (line 101) | def determinant
method inverse (line 104) | def inverse
method eigenvalues (line 107) | def eigenvalues
method eigenvectors (line 110) | def eigenvectors
method mssq (line 115) | def mssq
method eigenvectors_matrix (line 121) | def eigenvectors_matrix
method eigenpairs (line 126) | def eigenpairs
method square? (line 137) | def square?
method to_matrix (line 140) | def to_matrix
method total_sum (line 146) | def total_sum
type Statsample (line 158) | module Statsample
type NamedMatrix (line 160) | module NamedMatrix
function fields (line 163) | def fields
function fields= (line 167) | def fields=(v)
function fields_x= (line 172) | def fields_x=(v)
function fields_y= (line 176) | def fields_y=(v)
function fields_x (line 180) | def fields_x
function fields_y (line 183) | def fields_y
function name (line 187) | def name
function name= (line 190) | def name=(v)
function get_new_name (line 193) | def get_new_name
type CovariateMatrix (line 205) | module CovariateMatrix
function _type (line 210) | def _type
function _type= (line 222) | def _type=(t)
function correlation (line 225) | def correlation
function variance (line 249) | def variance(k)
function get_new_name (line 253) | def get_new_name
function submatrix (line 275) | def submatrix(rows,columns=nil)
function report_building (line 302) | def report_building(generator)
FILE: lib/statsample/multiset.rb
type Statsample (line 1) | module Statsample
class Multiset (line 5) | class Multiset
method initialize (line 12) | def initialize(fields)
method new_empty_vectors (line 16) | def self.new_empty_vectors(fields,ds_names)
method union (line 25) | def union(&block)
method datasets_names (line 53) | def datasets_names
method n_datasets (line 56) | def n_datasets
method add_dataset (line 59) | def add_dataset(key,ds)
method sum_field (line 66) | def sum_field(field)
method collect_vector (line 74) | def collect_vector(field)
method each_vector (line 80) | def each_vector(field)
method [] (line 85) | def[](i)
method each (line 88) | def each(&block)
class StratifiedSample (line 95) | class StratifiedSample
method mean (line 98) | def mean(*vectors)
method standard_error_ksd_wr (line 107) | def standard_error_ksd_wr(es)
method variance_ksd_wr (line 117) | def variance_ksd_wr(es)
method calculate_n_total (line 120) | def calculate_n_total(es)
method variance_ksd_wor (line 125) | def variance_ksd_wor(es)
method standard_error_ksd_wor (line 132) | def standard_error_ksd_wor(es)
method variance_esd_wor (line 138) | def variance_esd_wor(es)
method standard_error_esd_wor (line 148) | def standard_error_esd_wor(es)
method variance_esd_wr (line 152) | def variance_esd_wr(es)
method standard_error_esd_wr (line 160) | def standard_error_esd_wr(es)
method proportion_variance_ksd_wor (line 164) | def proportion_variance_ksd_wor(es)
method proportion_sd_ksd_wor (line 171) | def proportion_sd_ksd_wor(es)
method proportion_sd_ksd_wr (line 176) | def proportion_sd_ksd_wr(es)
method proportion_variance_ksd_wr (line 184) | def proportion_variance_ksd_wr(es)
method proportion_variance_esd_wor (line 188) | def proportion_variance_esd_wor(es)
method proportion_sd_esd_wor (line 197) | def proportion_sd_esd_wor(es)
method initialize (line 202) | def initialize(ms,strata_sizes)
method strata_number (line 212) | def strata_number
method population_size (line 217) | def population_size
method sample_size (line 221) | def sample_size
method stratum_size (line 225) | def stratum_size(h)
method vectors_by_field (line 228) | def vectors_by_field(field)
method proportion (line 234) | def proportion(field, v=1)
method stratum_ponderation (line 241) | def stratum_ponderation(h)
method mean (line 247) | def mean(field)
method standard_error_wor (line 254) | def standard_error_wor(field)
method standard_error_wor_2 (line 265) | def standard_error_wor_2(field)
method standard_error_wr (line 273) | def standard_error_wr(field)
method proportion_sd_esd_wor (line 280) | def proportion_sd_esd_wor(field,v=1)
method proportion_standard_error (line 288) | def proportion_standard_error(field,v=1)
method variance_pst (line 298) | def variance_pst(field,v=1)
FILE: lib/statsample/regression.rb
type Statsample (line 8) | module Statsample
type Regression (line 20) | module Regression
function simple (line 37) | def self.simple(x,y)
function multiple (line 53) | def self.multiple(ds,y_var, opts=Hash.new)
FILE: lib/statsample/regression/multiple.rb
type Statsample (line 2) | module Statsample
type Regression (line 3) | module Regression
type Multiple (line 42) | module Multiple
function r2_from_matrices (line 44) | def self.r2_from_matrices(rxx,rxy)
class MultipleDependent (line 49) | class MultipleDependent
method significance (line 50) | def significance
method initialize (line 53) | def initialize(matrix,y_var, opts=Hash.new)
method r2yx (line 69) | def r2yx
method syyx (line 73) | def syyx
method r2yx_covariance (line 76) | def r2yx_covariance
method vxy (line 80) | def vxy
method p2yx (line 83) | def p2yx
FILE: lib/statsample/regression/multiple/alglibengine.rb
type Statsample (line 2) | module Statsample
type Regression (line 3) | module Regression
type Multiple (line 4) | module Multiple
class AlglibEngine (line 19) | class AlglibEngine < BaseEngine
method initialize (line 20) | def initialize(ds,y_var, opts=Hash.new)
method _dump (line 44) | def _dump(i)
method _load (line 47) | def self._load(data)
method coeffs (line 52) | def coeffs
method matrix_resolution (line 57) | def matrix_resolution
method r2 (line 67) | def r2
method r (line 70) | def r
method sst (line 73) | def sst
method constant (line 76) | def constant
method standarized_coeffs (line 79) | def standarized_coeffs
method lr_s (line 83) | def lr_s
method build_standarized (line 89) | def build_standarized
method process (line 100) | def process(v)
method process_s (line 103) | def process_s(v)
method standarized_residuals (line 107) | def standarized_residuals
FILE: lib/statsample/regression/multiple/baseengine.rb
type Statsample (line 1) | module Statsample
type Regression (line 2) | module Regression
type Multiple (line 3) | module Multiple
class BaseEngine (line 5) | class BaseEngine
method univariate? (line 17) | def self.univariate?
method initialize (line 20) | def initialize(ds, y_var, opts = Hash.new)
method anova (line 39) | def anova
method se_estimate (line 43) | def se_estimate
method predicted (line 47) | def predicted
method standarized_predicted (line 59) | def standarized_predicted
method residuals (line 63) | def residuals
method r (line 75) | def r
method sst (line 79) | def sst
method r2_adjusted (line 87) | def r2_adjusted
method ssr (line 91) | def ssr
method sse (line 95) | def sse
method coeffs_t (line 99) | def coeffs_t
method msr (line 108) | def msr
method mse (line 112) | def mse
method df_r (line 116) | def df_r
method df_e (line 120) | def df_e
method f (line 124) | def f
method probability (line 128) | def probability
method tolerance (line 133) | def tolerance(var)
method coeffs_tolerances (line 142) | def coeffs_tolerances
method coeffs_se (line 149) | def coeffs_se
method se_r2 (line 159) | def se_r2
method estimated_variance_covariance_matrix (line 165) | def estimated_variance_covariance_matrix
method constant_t (line 178) | def constant_t
method constant_se (line 182) | def constant_se
method report_building (line 185) | def report_building(b)
method assign_names (line 212) | def assign_names(c)
method ssr_direct (line 222) | def ssr_direct
method sse_direct (line 237) | def sse_direct
method process (line 240) | def process(v)
FILE: lib/statsample/regression/multiple/gslengine.rb
type Statsample (line 2) | module Statsample
type Regression (line 3) | module Regression
type Multiple (line 4) | module Multiple
class GslEngine (line 19) | class GslEngine < BaseEngine
method initialize (line 20) | def initialize(ds,y_var, opts=Hash.new)
method _dump (line 55) | def _dump(i)
method _load (line 58) | def self._load(data)
method coeffs (line 63) | def coeffs
method matrix_resolution (line 68) | def matrix_resolution
method r2 (line 77) | def r2
method r (line 80) | def r
method sst (line 83) | def sst
method constant (line 86) | def constant
method standarized_coeffs (line 89) | def standarized_coeffs
method lr_s (line 93) | def lr_s
method build_standarized (line 99) | def build_standarized
method process_s (line 103) | def process_s(v)
method standarized_residuals (line 107) | def standarized_residuals
method coeffs_se (line 116) | def coeffs_se
FILE: lib/statsample/regression/multiple/matrixengine.rb
type Statsample (line 1) | module Statsample
type Regression (line 2) | module Regression
type Multiple (line 3) | module Multiple
class MatrixEngine (line 18) | class MatrixEngine < BaseEngine
method initialize (line 36) | def initialize(matrix,y_var, opts=Hash.new)
method cases (line 100) | def cases
method r2 (line 110) | def r2
method r (line 114) | def r
method constant (line 118) | def constant
method coeffs (line 123) | def coeffs
method standarized_coeffs (line 128) | def standarized_coeffs
method sst (line 132) | def sst
method df_r (line 137) | def df_r
method df_e (line 141) | def df_e
method tolerance (line 149) | def tolerance(var)
method coeffs_se (line 161) | def coeffs_se
method constant_t (line 170) | def constant_t
method constant_se (line 178) | def constant_se
FILE: lib/statsample/regression/multiple/rubyengine.rb
type Statsample (line 1) | module Statsample
type Regression (line 2) | module Regression
type Multiple (line 3) | module Multiple
class RubyEngine (line 18) | class RubyEngine < MatrixEngine
method initialize (line 19) | def initialize(ds,y_var, opts=Hash.new)
method set_dep_columns (line 40) | def set_dep_columns
method fix_with_mean (line 47) | def fix_with_mean
method fix_with_regression (line 62) | def fix_with_regression
method constant_se (line 84) | def constant_se
FILE: lib/statsample/regression/simple.rb
type Statsample (line 1) | module Statsample
type Regression (line 2) | module Regression
class Simple (line 10) | class Simple
method initialize (line 15) | def initialize(init_method, *argv)
method y (line 22) | def y(val_x)
method x (line 27) | def x(val_y)
method sse (line 31) | def sse
method standard_error (line 35) | def standard_error
method ssr (line 39) | def ssr
method sst (line 47) | def sst
method r (line 51) | def r
method r2 (line 55) | def r2
method new_from_gsl (line 62) | def new_from_gsl(ar)
method new_from_vectors (line 66) | def new_from_vectors(vx,vy, opts=Hash.new)
method new_from_dataset (line 70) | def new_from_dataset(ds,x,y, opts=Hash.new)
method init_vectors (line 74) | def init_vectors(vx,vy, opts=Hash.new)
method init_gsl (line 97) | def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
method report_building (line 106) | def report_building(gen)
FILE: lib/statsample/reliability.rb
type Statsample (line 1) | module Statsample
type Reliability (line 2) | module Reliability
function cronbach_alpha (line 6) | def cronbach_alpha(ods)
function cronbach_alpha_standarized (line 21) | def cronbach_alpha_standarized(ods)
function spearman_brown_prophecy (line 36) | def spearman_brown_prophecy(r,n)
function n_for_desired_reliability (line 45) | def n_for_desired_reliability(r,r_d,n=1)
function cronbach_alpha_from_n_s2_cov (line 53) | def cronbach_alpha_from_n_s2_cov(n,s2,cov)
function cronbach_alpha_from_covariance_matrix (line 57) | def cronbach_alpha_from_covariance_matrix(cov)
function n_for_desired_alpha (line 65) | def n_for_desired_alpha(alpha,s2,cov)
function alpha_first_derivative (line 97) | def alpha_first_derivative(n,sx,sxy)
function alfa_second_derivative (line 106) | def alfa_second_derivative(n,sx,sxy)
class ItemCharacteristicCurve (line 110) | class ItemCharacteristicCurve
method initialize (line 112) | def initialize (ds, vector_total=nil)
method process (line 121) | def process
method curve_field (line 137) | def curve_field(field, item)
FILE: lib/statsample/reliability/icc.rb
type Statsample (line 1) | module Statsample
type Reliability (line 2) | module Reliability
class ICC (line 23) | class ICC
method initialize (line 98) | def initialize(ds, opts=Hash.new)
method type= (line 111) | def type=(v)
method compute (line 196) | def compute
method icc_1_f (line 240) | def icc_1_f(rho=0.0)
method icc_1_k_f (line 246) | def icc_1_k_f(rho=0)
method icc_c_1_f (line 252) | def icc_c_1_f(rho=0)
method icc_c_k_f (line 257) | def icc_c_k_f(rho=0)
method v (line 263) | def v(a,b)
method a (line 266) | def a(rho)
method b (line 269) | def b(rho)
method c (line 272) | def c(rho)
method d (line 275) | def d(rho)
method icc_a_1_f (line 279) | def icc_a_1_f(rho=0)
method icc_a_k_f (line 290) | def icc_a_k_f(rho=0)
method icc_1_f_shrout (line 307) | def icc_1_f_shrout
method icc_1_1_ci (line 312) | def icc_1_1_ci(alpha=0.05)
method icc_1_k_ci (line 322) | def icc_1_k_ci(alpha=0.05)
method icc_2_f (line 330) | def icc_2_f
method icc_2_1_fs (line 338) | def icc_2_1_fs(pp,alpha=0.05)
method icc_2_1_ci (line 350) | def icc_2_1_ci(alpha=0.05)
method icc_2_1_ci_mcgraw (line 356) | def icc_2_1_ci_mcgraw(alpha=0.05)
method icc_2_k_ci (line 363) | def icc_2_k_ci(alpha=0.05)
method icc_2_k_ci_mcgraw (line 367) | def icc_2_k_ci_mcgraw(alpha=0.05)
method icc_2_k_ci_shrout (line 375) | def icc_2_k_ci_shrout(alpha=0.05)
method icc_3_f (line 381) | def icc_3_f
method icc_3_1_ci (line 385) | def icc_3_1_ci(alpha=0.05)
method icc_3_k_ci (line 392) | def icc_3_k_ci(alpha=0.05)
method icc_c_k_ci (line 399) | def icc_c_k_ci(alpha=0.05)
method report_building (line 405) | def report_building(b)
FILE: lib/statsample/reliability/multiscaleanalysis.rb
type Statsample (line 1) | module Statsample
type Reliability (line 2) | module Reliability
class MultiScaleAnalysis (line 23) | class MultiScaleAnalysis
method initialize (line 64) | def initialize(opts=Hash.new, &block)
method scale (line 93) | def scale(code, ds=nil, opts=nil)
method delete_scale (line 103) | def delete_scale(code)
method pca (line 109) | def pca(opts=nil)
method map (line 115) | def map(opts=nil)
method principal_axis_analysis (line 121) | def principal_axis_analysis(opts=nil)
method dataset_from_scales (line 125) | def dataset_from_scales
method parallel_analysis (line 134) | def parallel_analysis(opts=nil)
method correlation_matrix (line 140) | def correlation_matrix
method report_building (line 143) | def report_building(b) # :nodoc:
FILE: lib/statsample/reliability/scaleanalysis.rb
type Statsample (line 1) | module Statsample
type Reliability (line 2) | module Reliability
class ScaleAnalysis (line 13) | class ScaleAnalysis
method initialize (line 18) | def initialize(ds, opts=Hash.new)
method item_characteristic_curve (line 63) | def item_characteristic_curve
method item_total_correlation (line 89) | def item_total_correlation
method mean_rpb (line 99) | def mean_rpb
method item_statistics (line 102) | def item_statistics
method item_difficulty_analysis (line 111) | def item_difficulty_analysis
method stats_if_deleted (line 130) | def stats_if_deleted
method stats_if_deleted_intern (line 134) | def stats_if_deleted_intern # :nodoc:
method report_building (line 151) | def report_building(builder) #:nodoc:
FILE: lib/statsample/reliability/skillscaleanalysis.rb
type Statsample (line 1) | module Statsample
type Reliability (line 2) | module Reliability
class SkillScaleAnalysis (line 14) | class SkillScaleAnalysis
method initialize (line 19) | def initialize(ds,key,opts=Hash.new)
method corrected_dataset_minimal (line 32) | def corrected_dataset_minimal
method vector_sum (line 43) | def vector_sum
method vector_mean (line 46) | def vector_mean
method scale_analysis (line 49) | def scale_analysis
method corrected_dataset (line 54) | def corrected_dataset
method report_building (line 77) | def report_building(builder)
FILE: lib/statsample/resample.rb
type Statsample (line 1) | module Statsample
type Resample (line 2) | module Resample
function repeat_and_save (line 4) | def repeat_and_save(times,&action)
function generate (line 8) | def generate (size,low,upper)
FILE: lib/statsample/rserve_extension.rb
type Statsample (line 4) | module Statsample
class Vector (line 5) | class Vector
method to_REXP (line 6) | def to_REXP
class Dataset (line 10) | class Dataset
method to_REXP (line 11) | def to_REXP
FILE: lib/statsample/shorthand.rb
class Object (line 1) | class Object
method ss_analysis (line 3) | def ss_analysis(*args,&block)
type Statsample (line 8) | module Statsample
type Shorthand (line 10) | module Shorthand
function read_with_cache (line 14) | def read_with_cache(klass, filename,opts=Hash.new, cache=true)
function read_excel (line 25) | def read_excel(filename, opts=Hash.new, cache=true)
function read_csv (line 31) | def read_csv
function names (line 36) | def names(ds)
function cor (line 40) | def cor(ds)
function cov (line 44) | def cov(ds)
function vector (line 49) | def vector(*args)
function rnorm (line 53) | def rnorm(n,mean=0,sd=1)
function dataset (line 59) | def dataset(vectors=Hash.new)
function boxplot (line 65) | def boxplot(*args)
function histogram (line 69) | def histogram(*args)
function scatterplot (line 74) | def scatterplot(*args)
function levene (line 78) | def levene(*args)
function principal_axis (line 81) | def principal_axis(*args)
function polychoric (line 85) | def polychoric(*args)
function tetrachoric (line 88) | def tetrachoric(*args)
function lr (line 95) | def lr(*args)
function pca (line 98) | def pca(ds,opts=Hash.new)
function dominance_analysis (line 101) | def dominance_analysis(*args)
function dominance_analysis_bootstrap (line 104) | def dominance_analysis_bootstrap(*args)
function scale_analysis (line 107) | def scale_analysis(*args)
function skill_scale_analysis (line 110) | def skill_scale_analysis(*args)
function multiscale_analysis (line 113) | def multiscale_analysis(*args,&block)
function test_u (line 116) | def test_u(*args)
FILE: lib/statsample/srs.rb
type Statsample (line 1) | module Statsample
type SRS (line 7) | module SRS
function fpc_var (line 20) | def fpc_var(sam,pop)
function fpc (line 24) | def fpc(sam,pop)
function qf (line 31) | def qf(sam , pop)
function estimation_n0 (line 35) | def estimation_n0(d,prop,margin=0.95)
function estimation_n (line 41) | def estimation_n(d,prop,n_pobl,margin=0.95)
function proportion_confidence_interval_t (line 50) | def proportion_confidence_interval_t(prop, n_sample, n_population, m...
function proportion_confidence_interval_z (line 57) | def proportion_confidence_interval_z(p, n_sample, n_population, marg...
function proportion_confidence_interval (line 64) | def proportion_confidence_interval(p, sam,pop , x)
function proportion_sd_kp_wr (line 72) | def proportion_sd_kp_wr(p, n_sample)
function proportion_sd_kp_wor (line 80) | def proportion_sd_kp_wor(p, sam, pop)
function proportion_sd_ep_wr (line 86) | def proportion_sd_ep_wr(p, n_sample)
function proportion_sd_ep_wor (line 93) | def proportion_sd_ep_wor(p, sam,pop)
function proportion_total_sd_kp_wor (line 102) | def proportion_total_sd_kp_wor(prop, sam, pop)
function proportion_total_sd_ep_wor (line 108) | def proportion_total_sd_ep_wor(prop, sam, pop)
function standard_error_ksd_wr (line 121) | def standard_error_ksd_wr(s, sam, pop)
function standard_error_ksd_wor (line 126) | def standard_error_ksd_wor(s,sam,pop)
function standard_error_esd_wor (line 135) | def standard_error_esd_wor(s,sam,pop)
function standard_error_total (line 144) | def standard_error_total(s,sam,pop)
function mean_confidence_interval_t (line 150) | def mean_confidence_interval_t(mean,s,n_sample,n_population,margin=0...
function mean_confidence_interval_z (line 156) | def mean_confidence_interval_z(mean,s,n_sample,n_population,margin=0...
function mean_confidence_interval (line 163) | def mean_confidence_interval(mean,s,n_sample,n_population,x)
FILE: lib/statsample/test.rb
type Statsample (line 1) | module Statsample
type Test (line 4) | module Test
function p_using_cdf (line 21) | def p_using_cdf(cdf, tails=:both)
function t_critical (line 36) | def t_critical(confidence_level, df)
function z_critical (line 40) | def z_critical(confidence_level)
function chi_square (line 47) | def chi_square(observed, expected=nil)
function u_mannwhitney (line 60) | def u_mannwhitney(v1, v2)
function t_one_sample (line 64) | def t_one_sample(vector, opts=Hash.new)
function t_two_samples_independent (line 68) | def t_two_samples_independent(v1,v2, opts=Hash.new)
function wilcoxon_signed_rank (line 72) | def wilcoxon_signed_rank(v1,v2,opts=Hash.new)
function levene (line 76) | def levene(input, opts=Hash.new)
FILE: lib/statsample/test/bartlettsphericity.rb
type Statsample (line 1) | module Statsample
type Test (line 2) | module Test
class BartlettSphericity (line 10) | class BartlettSphericity
method initialize (line 21) | def initialize(matrix,ncases)
method compute (line 32) | def compute
method probability (line 36) | def probability
method report_building (line 39) | def report_building(builder) # :nodoc:
FILE: lib/statsample/test/chisquare.rb
type Statsample (line 1) | module Statsample
type Test (line 2) | module Test
type ChiSquare (line 3) | module ChiSquare
class WithMatrix (line 4) | class WithMatrix
method initialize (line 7) | def initialize(observed, expected=nil)
method calculate_expected (line 14) | def calculate_expected
method to_f (line 22) | def to_f
method chi_square (line 25) | def chi_square
method probability (line 28) | def probability
method compute_chi (line 31) | def compute_chi
FILE: lib/statsample/test/f.rb
type Statsample (line 1) | module Statsample
type Test (line 2) | module Test
class F (line 5) | class F
method initialize (line 19) | def initialize(var_num, var_den, df_num, df_den, opts=Hash.new)
method f (line 33) | def f
method to_f (line 36) | def to_f
method probability (line 40) | def probability
method report_building (line 43) | def report_building(builder) #:nodoc:
FILE: lib/statsample/test/kolmogorovsmirnov.rb
type Statsample (line 1) | module Statsample
type Test (line 2) | module Test
class KolmogorovSmirnov (line 4) | class KolmogorovSmirnov
method initialize (line 13) | def initialize(d1,d2)
method calculate (line 25) | def calculate
method make_cdf (line 37) | def make_cdf(v)
class EmpiricDistribution (line 40) | class EmpiricDistribution
method initialize (line 41) | def initialize(data)
method each (line 47) | def each
method cdf (line 52) | def cdf(x)
FILE: lib/statsample/test/levene.rb
type Statsample (line 1) | module Statsample
type Test (line 2) | module Test
class Levene (line 21) | class Levene
method initialize (line 31) | def initialize(input, opts=Hash.new())
method f (line 44) | def f
method report_building (line 47) | def report_building(builder) # :nodoc:
method compute (line 50) | def compute
method probability (line 81) | def probability
FILE: lib/statsample/test/t.rb
type Statsample (line 1) | module Statsample
type Test (line 2) | module Test
class T (line 8) | class T
method one_sample (line 18) | def one_sample(x,u,s,n)
method two_sample_independent (line 30) | def two_sample_independent(x1, x2, s1, s2, n1, n2, equal_variance ...
method df_equal_variance (line 41) | def df_equal_variance(n1,n2)
method df_not_equal_variance (line 51) | def df_not_equal_variance(s1,s2,n1,n2)
method initialize (line 76) | def initialize(estimate, standard_error, df, opts=Hash.new)
method to_f (line 95) | def to_f
method probability (line 100) | def probability
method confidence_interval (line 104) | def confidence_interval(cl=nil)
method report_building (line 112) | def report_building(builder) #:nodoc:
method report_building_t (line 118) | def report_building_t(s)
class OneSample (line 140) | class OneSample
method initialize (line 160) | def initialize(vector, opts=Hash.new)
method t_object (line 171) | def t_object
method t (line 174) | def t
method probability (line 177) | def probability
method standard_error (line 180) | def standard_error
method confidence_interval (line 184) | def confidence_interval(cl=nil)
method report_building (line 188) | def report_building(b) # :nodoc:
class TwoSamplesIndependent (line 225) | class TwoSamplesIndependent
method initialize (line 258) | def initialize(v1, v2, opts=Hash.new)
method compute (line 268) | def compute
method d (line 282) | def d
method report_building (line 290) | def report_building(b) # :nodoc:
FILE: lib/statsample/test/umannwhitney.rb
type Statsample (line 1) | module Statsample
type Test (line 2) | module Test
class UMannWhitney (line 18) | class UMannWhitney
method u_sampling_distribution_as62 (line 31) | def self.u_sampling_distribution_as62(n1,n2)
method distribution_permutations (line 78) | def self.distribution_permutations(n1,n2)
method initialize (line 118) | def initialize(v1,v2, opts=Hash.new)
method report_building (line 147) | def report_building(generator) # :nodoc:
method probability_exact (line 162) | def probability_exact
method adjust_for_ties (line 174) | def adjust_for_ties(data)
method z (line 187) | def z
method probability_z (line 202) | def probability_z
FILE: lib/statsample/test/wilcoxonsignedrank.rb
type Statsample (line 1) | module Statsample
type Test (line 2) | module Test
class WilcoxonSignedRank (line 5) | class WilcoxonSignedRank
method initialize (line 15) | def initialize(v1,v2, opts=Hash.new)
method calculate (line 25) | def calculate
method report_building (line 42) | def report_building(generator) # :nodoc:
method z (line 53) | def z
method probability_z (line 60) | def probability_z
method probability_exact (line 65) | def probability_exact
FILE: lib/statsample/vector.rb
type Statsample::VectorShorthands (line 4) | module Statsample::VectorShorthands
function to_vector (line 7) | def to_vector(*args)
function to_scale (line 11) | def to_scale(*args)
class Array (line 16) | class Array
type GSL (line 21) | module GSL
class Vector (line 22) | class Vector
type Statsample (line 27) | module Statsample
class Vector (line 38) | class Vector
method initialize (line 73) | def initialize(data=[], type=:nominal, opts=Hash.new)
method [] (line 106) | def self.[](*args)
method new_scale (line 129) | def self.new_scale(n,val=nil, &block)
method dup (line 141) | def dup
method dup_empty (line 146) | def dup_empty
method check_type (line 152) | def check_type(t)
method check_type (line 156) | def check_type(t) #:nodoc:
method _check_type (line 162) | def _check_type(t) #:nodoc:
method vector_standarized_compute (line 166) | def vector_standarized_compute(m,sd) # :nodoc:
method vector_standarized (line 173) | def vector_standarized(use_population=false)
method vector_centered_compute (line 182) | def vector_centered_compute(m) #:nodoc:
method vector_centered (line 186) | def vector_centered
method vector_percentil (line 199) | def vector_percentil
method box_cox_transformation (line 206) | def box_cox_transformation(lambda) # :nodoc:
method == (line 223) | def ==(v2)
method _dump (line 228) | def _dump(i) # :nodoc:
method _load (line 232) | def self._load(data) # :nodoc:
method recode (line 238) | def recode(type=nil)
method recode! (line 246) | def recode!
method push (line 252) | def push(v)
method dichotomize (line 259) | def dichotomize(low=nil)
method each (line 275) | def each
method each_index (line 280) | def each_index
method add (line 289) | def add(v,update_valid=true)
method set_valid_data (line 308) | def set_valid_data
method set_valid_data_intern (line 318) | def set_valid_data_intern #:nodoc:
method set_valid_data_intern (line 322) | def set_valid_data_intern #:nodoc:
method _set_valid_data_intern (line 326) | def _set_valid_data_intern #:nodoc:
method has_missing_data? (line 340) | def has_missing_data?
method labeling (line 347) | def labeling(x)
method vector_labeled (line 352) | def vector_labeled
method size (line 363) | def size
method [] (line 369) | def [](i)
method []= (line 374) | def []=(i,v)
method is_valid? (line 378) | def is_valid?(x)
method missing_values= (line 383) | def missing_values=(vals)
method today_values= (line 388) | def today_values=(vals)
method type= (line 393) | def type=(t)
method to_a (line 398) | def to_a
method + (line 412) | def +(v)
method - (line 422) | def -(v)
method * (line 426) | def *(v)
method verify (line 431) | def verify
method _vector_ari (line 440) | def _vector_ari(method,v) # :nodoc:
method splitted (line 471) | def splitted(sep=Statsample::SPLIT_TOKEN)
method split_by_separator (line 495) | def split_by_separator(sep=Statsample::SPLIT_TOKEN)
method split_by_separator_freq (line 518) | def split_by_separator_freq(sep=Statsample::SPLIT_TOKEN)
method bootstrap (line 540) | def bootstrap(estimators, nr, s=nil)
method jacknife (line 579) | def jacknife(estimators, k=1)
method prepare_bootstrap (line 617) | def prepare_bootstrap(estimators)
method sample_with_replacement (line 641) | def sample_with_replacement(sample=1)
method sample_without_replacement (line 652) | def sample_without_replacement(sample=1)
method count (line 667) | def count(x=false)
method db_type (line 681) | def db_type(dbs='mysql')
method can_be_date? (line 694) | def can_be_date?
method can_be_scale? (line 703) | def can_be_scale?
method to_s (line 711) | def to_s
method to_matrix (line 716) | def to_matrix(dir=:horizontal)
method inspect (line 724) | def inspect
method factors (line 728) | def factors
method frequencies (line 740) | def frequencies
method frequencies (line 744) | def frequencies #:nodoc:
method _frequencies (line 750) | def _frequencies #:nodoc:
method mode (line 759) | def mode
method n_valid (line 763) | def n_valid
method proportions (line 768) | def proportions
method proportion (line 775) | def proportion(v=1)
method report_building (line 778) | def report_building(b)
method variance_proportion (line 808) | def variance_proportion(n_poblation, v=1)
method variance_total (line 812) | def variance_total(n_poblation, v=1)
method proportion_confidence_interval_t (line 815) | def proportion_confidence_interval_t(n_poblation,margin=0.95,v=1)
method proportion_confidence_interval_z (line 818) | def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
method percentil (line 843) | def percentil(q, strategy = :midpoint)
method ranked (line 874) | def ranked(type=:ordinal)
method median (line 885) | def median
method min (line 890) | def min
method max (line 895) | def max
method set_date_data (line 900) | def set_date_data
method set_scale_data (line 916) | def set_scale_data
method range (line 931) | def range;
method sum (line 936) | def sum
method mean (line 941) | def mean
method sum_of_squares (line 949) | def sum_of_squares(m=nil)
method sum_of_squared_deviation (line 955) | def sum_of_squared_deviation
method variance_population (line 961) | def variance_population(m=nil)
method standard_deviation_population (line 970) | def standard_deviation_population(m=nil)
method average_deviation_population (line 978) | def average_deviation_population( m = nil )
method median_absolute_deviation (line 983) | def median_absolute_deviation
method variance_sample (line 989) | def variance_sample(m=nil)
method standard_deviation_sample (line 996) | def standard_deviation_sample(m=nil)
method skew (line 1002) | def skew(m=nil)
method kurtosis (line 1009) | def kurtosis(m=nil)
method product (line 1018) | def product
method histogram (line 1025) | def histogram(bins=10)
method coefficient_of_variation (line 1050) | def coefficient_of_variation
method standard_error (line 1056) | def standard_error
FILE: lib/statsample/vector/gsl.rb
type Statsample (line 1) | module Statsample
class Vector (line 2) | class Vector
type GSL_ (line 3) | module GSL_
function clear_gsl (line 4) | def clear_gsl
function set_valid_data (line 8) | def set_valid_data
function push (line 12) | def push(v)
function gsl (line 20) | def gsl
function vector_standarized_compute (line 25) | def vector_standarized_compute(m,sd)
function vector_centered_compute (line 33) | def vector_centered_compute(m)
function sample_with_replacement (line 40) | def sample_with_replacement(sample=1)
function sample_without_replacement (line 49) | def sample_without_replacement(sample=1)
function median (line 57) | def median
function sum (line 66) | def sum
function mean (line 70) | def mean
function variance_sample (line 74) | def variance_sample(m=nil)
function standard_deviation_sample (line 80) | def standard_deviation_sample(m=nil)
function variance_population (line 86) | def variance_population(m=nil) # :nodoc:
function standard_deviation_population (line 91) | def standard_deviation_population(m=nil) # :nodoc:
function skew (line 96) | def skew # :nodoc:
function kurtosis (line 100) | def kurtosis # :nodoc:
FILE: lib/statsample/version.rb
type Statsample (line 1) | module Statsample
FILE: setup.rb
type Enumerable (line 12) | module Enumerable
function read (line 18) | def File.read(fname)
type Errno (line 26) | module Errno
class ENOTEMPTY (line 27) | class ENOTEMPTY
function binread (line 33) | def File.binread(fname)
function dir? (line 40) | def File.dir?(path)
class ConfigTable (line 45) | class ConfigTable
method initialize (line 49) | def initialize(rbconfig)
method verbose? (line 65) | def verbose?
method no_harm? (line 71) | def no_harm?
method [] (line 75) | def [](key)
method []= (line 79) | def []=(key, val)
method names (line 83) | def names
method each (line 87) | def each(&block)
method key? (line 91) | def key?(name)
method lookup (line 95) | def lookup(name)
method add (line 99) | def add(item)
method remove (line 104) | def remove(name)
method load_script (line 111) | def load_script(path, inst = nil)
method savefile (line 117) | def savefile
method load_savefile (line 121) | def load_savefile
method save (line 132) | def save
method load_standard_entries (line 141) | def load_standard_entries
method standard_entries (line 147) | def standard_entries(rbconfig)
method load_multipackage_entries (line 260) | def load_multipackage_entries
method multipackage_entries (line 266) | def multipackage_entries
method fixup (line 295) | def fixup
method parse_opt (line 304) | def parse_opt(opt)
method dllext (line 309) | def dllext
method value_config? (line 313) | def value_config?(name)
class Item (line 317) | class Item
method initialize (line 318) | def initialize(name, template, default, desc)
method help_opt (line 332) | def help_opt
method value? (line 336) | def value?
method value (line 340) | def value
method resolve (line 344) | def resolve(table)
method set (line 348) | def set(val)
method check (line 354) | def check(val)
class BoolItem (line 360) | class BoolItem < Item
method config_type (line 361) | def config_type
method help_opt (line 365) | def help_opt
method check (line 371) | def check(val)
class PathItem (line 382) | class PathItem < Item
method config_type (line 383) | def config_type
method check (line 389) | def check(path)
class ProgramItem (line 395) | class ProgramItem < Item
method config_type (line 396) | def config_type
class SelectItem (line 401) | class SelectItem < Item
method initialize (line 402) | def initialize(name, selection, default, desc)
method config_type (line 407) | def config_type
method check (line 413) | def check(val)
class ExecItem (line 421) | class ExecItem < Item
method initialize (line 422) | def initialize(name, selection, desc, &block)
method config_type (line 428) | def config_type
method value? (line 432) | def value?
method resolve (line 436) | def resolve(table)
method evaluate (line 442) | def evaluate(val, table)
class PackageSelectionItem (line 451) | class PackageSelectionItem < Item
method initialize (line 452) | def initialize(name, template, default, help_default, desc)
method config_type (line 459) | def config_type
method check (line 465) | def check(val)
class MetaConfigEnvironment (line 473) | class MetaConfigEnvironment
method initialize (line 474) | def initialize(config, installer)
method config_names (line 479) | def config_names
method config? (line 483) | def config?(name)
method bool_config? (line 487) | def bool_config?(name)
method path_config? (line 491) | def path_config?(name)
method value_config? (line 495) | def value_config?(name)
method add_config (line 499) | def add_config(item)
method add_bool_config (line 503) | def add_bool_config(name, default, desc)
method add_path_config (line 507) | def add_path_config(name, default, desc)
method set_config_default (line 511) | def set_config_default(name, default)
method remove_config (line 515) | def remove_config(name)
method packages (line 520) | def packages
method declare_packages (line 526) | def declare_packages(list)
type FileOperations (line 536) | module FileOperations
function mkdir_p (line 538) | def mkdir_p(dirname, prefix = nil)
function rm_f (line 555) | def rm_f(path)
function rm_rf (line 561) | def rm_rf(path)
function remove_tree (line 567) | def remove_tree(path)
function remove_tree0 (line 577) | def remove_tree0(path)
function move_file (line 597) | def move_file(src, dest)
function force_remove_file (line 610) | def force_remove_file(path)
function remove_file (line 617) | def remove_file(path)
function install (line 622) | def install(from, dest, mode, prefix = nil)
function diff? (line 648) | def diff?(new_content, path)
function command (line 653) | def command(*args)
function ruby (line 659) | def ruby(*args)
function make (line 663) | def make(task = nil)
function extdir? (line 667) | def extdir?(dir)
function files_of (line 671) | def files_of(dir)
function directories_of (line 679) | def directories_of(dir)
type HookScriptAPI (line 689) | module HookScriptAPI
function get_config (line 691) | def get_config(key)
function set_config (line 698) | def set_config(key, val)
function curr_srcdir (line 706) | def curr_srcdir
function curr_objdir (line 710) | def curr_objdir
function srcfile (line 714) | def srcfile(path)
function srcexist? (line 718) | def srcexist?(path)
function srcdirectory? (line 722) | def srcdirectory?(path)
function srcfile? (line 726) | def srcfile?(path)
function srcentries (line 730) | def srcentries(path = '.')
function srcfiles (line 736) | def srcfiles(path = '.')
function srcdirectories (line 742) | def srcdirectories(path = '.')
class ToplevelInstaller (line 751) | class ToplevelInstaller
method invoke (line 767) | def ToplevelInstaller.invoke
method multipackage? (line 776) | def ToplevelInstaller.multipackage?
method load_rbconfig (line 780) | def ToplevelInstaller.load_rbconfig
method initialize (line 791) | def initialize(ardir_root, config)
method config (line 798) | def config(key)
method inspect (line 802) | def inspect
method invoke (line 806) | def invoke
method run_metaconfigs (line 830) | def run_metaconfigs
method init_installers (line 834) | def init_installers
method srcdir_root (line 842) | def srcdir_root
method objdir_root (line 846) | def objdir_root
method relpath (line 850) | def relpath
method parsearg_global (line 858) | def parsearg_global
method valid_task? (line 884) | def valid_task?(t)
method valid_task_re (line 888) | def valid_task_re
method parsearg_no_options (line 892) | def parsearg_no_options
method parsearg_config (line 905) | def parsearg_config
method parsearg_install (line 931) | def parsearg_install
method print_usage (line 948) | def print_usage(out)
method exec_config (line 990) | def exec_config
method exec_setup (line 995) | def exec_setup
method exec_install (line 999) | def exec_install
method exec_test (line 1003) | def exec_test
method exec_show (line 1007) | def exec_show
method exec_clean (line 1013) | def exec_clean
method exec_distclean (line 1017) | def exec_distclean
class ToplevelInstallerMulti (line 1024) | class ToplevelInstallerMulti < ToplevelInstaller
method initialize (line 1028) | def initialize(ardir_root, config)
method run_metaconfigs (line 1035) | def run_metaconfigs
method packages= (line 1044) | def packages=(list)
method init_installers (line 1053) | def init_installers
method extract_selection (line 1068) | def extract_selection(list)
method print_usage (line 1076) | def print_usage(f)
method exec_config (line 1087) | def exec_config
method exec_setup (line 1094) | def exec_setup
method exec_install (line 1100) | def exec_install
method exec_test (line 1106) | def exec_test
method exec_clean (line 1112) | def exec_clean
method exec_distclean (line 1119) | def exec_distclean
method each_selected_installers (line 1130) | def each_selected_installers
method run_hook (line 1141) | def run_hook(id)
method verbose? (line 1146) | def verbose?
method no_harm? (line 1151) | def no_harm?
class Installer (line 1158) | class Installer
method initialize (line 1165) | def initialize(config, srcroot, objroot)
method inspect (line 1172) | def inspect
method noop (line 1176) | def noop(rel)
method srcdir_root (line 1183) | def srcdir_root
method objdir_root (line 1187) | def objdir_root
method relpath (line 1191) | def relpath
method verbose? (line 1200) | def verbose?
method no_harm? (line 1205) | def no_harm?
method verbose_off (line 1209) | def verbose_off
method exec_config (line 1222) | def exec_config
method config_dir_ext (line 1229) | def config_dir_ext(rel)
method extconf (line 1237) | def extconf
method exec_setup (line 1245) | def exec_setup
method setup_dir_bin (line 1249) | def setup_dir_bin(rel)
method setup_dir_ext (line 1257) | def setup_dir_ext(rel)
method update_shebang_line (line 1265) | def update_shebang_line(path)
method new_shebang (line 1287) | def new_shebang(old)
method open_atomic_writer (line 1298) | def open_atomic_writer(path, &block)
class Shebang (line 1308) | class Shebang
method load (line 1309) | def Shebang.load(path)
method parse (line 1318) | def Shebang.parse(line)
method initialize (line 1323) | def initialize(cmd, args = [])
method to_s (line 1331) | def to_s
method exec_install (line 1340) | def exec_install
method install_dir_bin (line 1345) | def install_dir_bin(rel)
method install_dir_lib (line 1349) | def install_dir_lib(rel)
method install_dir_ext (line 1353) | def install_dir_ext(rel)
method install_dir_data (line 1360) | def install_dir_data(rel)
method install_dir_conf (line 1364) | def install_dir_conf(rel)
method install_dir_man (line 1370) | def install_dir_man(rel)
method install_files (line 1374) | def install_files(list, dest, mode)
method libfiles (line 1381) | def libfiles
method rubyextentions (line 1385) | def rubyextentions(dir)
method targetfiles (line 1393) | def targetfiles
method mapdir (line 1397) | def mapdir(ents)
method existfiles (line 1415) | def existfiles
method hookfiles (line 1419) | def hookfiles
method glob_select (line 1425) | def glob_select(pat, ents)
method glob_reject (line 1430) | def glob_reject(pats, ents)
method globs2re (line 1442) | def globs2re(pats)
method exec_test (line 1454) | def exec_test
method exec_clean (line 1474) | def exec_clean
method clean_dir_ext (line 1486) | def clean_dir_ext(rel)
method exec_distclean (line 1495) | def exec_distclean
method distclean_dir_ext (line 1504) | def distclean_dir_ext(rel)
method exec_task_traverse (line 1517) | def exec_task_traverse(task)
method traverse (line 1529) | def traverse(task, rel, mid)
method dive_into (line 1540) | def dive_into(rel)
method run_hook (line 1555) | def run_hook(id)
class SetupError (line 1570) | class SetupError < StandardError; end
function setup_rb_error (line 1572) | def setup_rb_error(msg)
FILE: test/fixtures/correlation_matrix.rb
type Statsample (line 2) | module Statsample
type Fixtures (line 3) | module Fixtures
function harman_817 (line 4) | def harman_817
FILE: test/helpers_tests.rb
type MiniTest (line 15) | module MiniTest
class Test (line 16) | class Test
method should_with_gsl (line 20) | def self.should_with_gsl(name,&block)
type Assertions (line 34) | module Assertions
function assert_similar_vector (line 35) | def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
function assert_equal_vector (line 42) | def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
function assert_equal_matrix (line 48) | def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
function assert_nothing_raised (line 61) | def assert_nothing_raised(msg=nil)
FILE: test/test_analysis.rb
class StatsampleAnalysisTestCase (line 3) | class StatsampleAnalysisTestCase < MiniTest::Unit::TestCase
FILE: test/test_anova_contrast.rb
class StatsampleAnovaContrastTestCase (line 2) | class StatsampleAnovaContrastTestCase < MiniTest::Unit::TestCase
FILE: test/test_anovaoneway.rb
class StatsampleAnovaOneWayTestCase (line 2) | class StatsampleAnovaOneWayTestCase < MiniTest::Unit::TestCase
FILE: test/test_anovatwoway.rb
class StatsampleAnovaTwoWayTestCase (line 2) | class StatsampleAnovaTwoWayTestCase < MiniTest::Unit::TestCase
FILE: test/test_anovatwowaywithdataset.rb
class StatsampleAnovaTwoWayWithVectorsTestCase (line 4) | class StatsampleAnovaTwoWayWithVectorsTestCase < MiniTest::Unit::TestCase
FILE: test/test_anovawithvectors.rb
class StatsampleAnovaOneWayWithVectorsTestCase (line 2) | class StatsampleAnovaOneWayWithVectorsTestCase < MiniTest::Unit::TestCase
FILE: test/test_awesome_print_bug.rb
class StatsampleAwesomePrintBug (line 2) | class StatsampleAwesomePrintBug < MiniTest::Test
FILE: test/test_bartlettsphericity.rb
class StatsampleBartlettSphericityTestCase (line 3) | class StatsampleBartlettSphericityTestCase < MiniTest::Test
FILE: test/test_bivariate.rb
class StatsampleBivariateTestCase (line 2) | class StatsampleBivariateTestCase < MiniTest::Test
FILE: test/test_codification.rb
class StatsampleCodificationTestCase (line 2) | class StatsampleCodificationTestCase < MiniTest::Unit::TestCase
method initialize (line 4) | def initialize(*args)
method test_create_hash (line 10) | def test_create_hash
method test_create_excel (line 17) | def test_create_excel
method test_create_yaml (line 32) | def test_create_yaml
method test_recodification (line 50) | def test_recodification
method test_recode_dataset_simple (line 57) | def test_recode_dataset_simple
method test_recode_dataset_split (line 63) | def test_recode_dataset_split
FILE: test/test_crosstab.rb
class StatsampleCrosstabTestCase (line 2) | class StatsampleCrosstabTestCase < MiniTest::Unit::TestCase
method initialize (line 4) | def initialize(*args)
method test_crosstab_errors (line 10) | def test_crosstab_errors
method test_crosstab_basic (line 24) | def test_crosstab_basic
method test_crosstab_frequencies (line 30) | def test_crosstab_frequencies
method test_summary (line 43) | def test_summary
method test_expected (line 49) | def test_expected
method test_crosstab_with_scale (line 55) | def test_crosstab_with_scale
FILE: test/test_csv.rb
class StatsampleCSVTestCase (line 2) | class StatsampleCSVTestCase < MiniTest::Unit::TestCase
method setup (line 3) | def setup
method test_read (line 6) | def test_read
method test_nil (line 20) | def test_nil
method test_repeated (line 23) | def test_repeated
method test_write (line 29) | def test_write
FILE: test/test_dataset.rb
class StatsampleDatasetTestCase (line 2) | class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
method setup (line 3) | def setup
method test_nest (line 8) | def test_nest
method test_should_have_summary (line 20) | def test_should_have_summary
method test_basic (line 23) | def test_basic
method test_saveload (line 27) | def test_saveload
method test_gsl (line 33) | def test_gsl
method test_matrix (line 42) | def test_matrix
method test_fields (line 48) | def test_fields
method test_merge (line 54) | def test_merge
method test_each_vector (line 72) | def test_each_vector
method test_equality (line 89) | def test_equality
method test_add_vector (line 100) | def test_add_vector
method test_vector_by_calculation (line 109) | def test_vector_by_calculation
method test_vector_sum (line 120) | def test_vector_sum
method test_vector_missing_values (line 136) | def test_vector_missing_values
method test_has_missing_values (line 147) | def test_has_missing_values
method test_vector_count_characters (line 160) | def test_vector_count_characters
method test_vector_mean (line 171) | def test_vector_mean
method test_each_array (line 192) | def test_each_array
method test_recode (line 200) | def test_recode
method test_case_as (line 206) | def test_case_as
method test_delete_vector (line 216) | def test_delete_vector
method test_change_type (line 221) | def test_change_type
method test_split_by_separator_recode (line 225) | def test_split_by_separator_recode
method test_split_by_separator (line 235) | def test_split_by_separator
method test_percentiles (line 242) | def test_percentiles
method test_add_case (line 254) | def test_add_case
method test_marshaling (line 268) | def test_marshaling
method test_range (line 272) | def test_range
method test_clone (line 285) | def test_clone
method test_dup (line 311) | def test_dup
method test_from_to (line 348) | def test_from_to
method test_each_array_with_nils (line 354) | def test_each_array_with_nils
method test_dup_only_valid (line 367) | def test_dup_only_valid
method test_filter (line 381) | def test_filter
method test_filter_field (line 389) | def test_filter_field
method test_verify (line 396) | def test_verify
method test_compute_operation (line 416) | def test_compute_operation
method test_crosstab_with_asignation (line 429) | def test_crosstab_with_asignation
method test_one_to_many (line 444) | def test_one_to_many
FILE: test/test_dominance_analysis.rb
class StatsampleDominanceAnalysisTestCase (line 2) | class StatsampleDominanceAnalysisTestCase < MiniTest::Unit::TestCase
method test_dominance_univariate (line 3) | def test_dominance_univariate
method test_dominance_multivariate (line 25) | def test_dominance_multivariate
FILE: test/test_factor.rb
class StatsampleFactorTestCase (line 5) | class StatsampleFactorTestCase < MiniTest::Unit::TestCase
method setup (line 8) | def setup
method test_covariance_matrix (line 12) | def test_covariance_matrix
method test_principalcomponents_ruby_gsl (line 36) | def test_principalcomponents_ruby_gsl
method test_principalcomponents (line 80) | def test_principalcomponents()
method principalcomponents (line 85) | def principalcomponents(gsl)
method test_antiimage (line 114) | def test_antiimage
method test_kmo (line 120) | def test_kmo
method test_kmo_univariate (line 132) | def test_kmo_univariate
method test_pca (line 140) | def test_pca
method pca_set (line 156) | def pca_set(pca,type)
method test_principalaxis (line 175) | def test_principalaxis
method test_rotation_varimax (line 202) | def test_rotation_varimax
FILE: test/test_factor_map.rb
class StatsampleFactorMpaTestCase (line 5) | class StatsampleFactorMpaTestCase < MiniTest::Unit::TestCase
method map_assertions (line 34) | def map_assertions(map)
FILE: test/test_factor_pa.rb
class StatsampleFactorTestCase (line 5) | class StatsampleFactorTestCase < MiniTest::Unit::TestCase
method setup (line 8) | def setup
method test_parallelanalysis_with_data (line 11) | def test_parallelanalysis_with_data
method test_parallelanalysis (line 45) | def test_parallelanalysis
FILE: test/test_ggobi.rb
class StatsampleGGobiTestCase (line 3) | class StatsampleGGobiTestCase < MiniTest::Unit::TestCase
method setup (line 5) | def setup
method test_values_definition (line 12) | def test_values_definition
method test_variable_definition (line 16) | def test_variable_definition
FILE: test/test_gsl.rb
class StatsampleGSLTestCase (line 2) | class StatsampleGSLTestCase < MiniTest::Unit::TestCase
FILE: test/test_histogram.rb
class StatsampleHistogramTestCase (line 4) | class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
FILE: test/test_matrix.rb
class StatsampleMatrixTestCase (line 3) | class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
method test_to_dataset (line 5) | def test_to_dataset
method test_covariate (line 22) | def test_covariate
FILE: test/test_multiset.rb
class StatsampleMultisetTestCase (line 4) | class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
method setup (line 5) | def setup
method test_creation (line 12) | def test_creation
method test_creation_empty (line 33) | def test_creation_empty
method test_to_multiset_by_split_one (line 44) | def test_to_multiset_by_split_one
method test_to_multiset_by_split_multiple (line 57) | def test_to_multiset_by_split_multiple
method test_stratum_proportion (line 70) | def test_stratum_proportion
method test_stratum_scale (line 83) | def test_stratum_scale
method test_each (line 99) | def test_each
method test_multiset_union_with_block (line 123) | def test_multiset_union_with_block
method test_multiset_union (line 140) | def test_multiset_union
FILE: test/test_regression.rb
class StatsampleRegressionTestCase (line 3) | class StatsampleRegressionTestCase < MiniTest::Unit::TestCase
method test_parameters (line 40) | def test_parameters
method _test_simple_regression (line 52) | def _test_simple_regression(reg)
method test_summaries (line 60) | def test_summaries
method test_multiple_dependent (line 68) | def test_multiple_dependent
method test_multiple_regression_pairwise_2 (line 91) | def test_multiple_regression_pairwise_2
method test_multiple_regression_gsl (line 107) | def test_multiple_regression_gsl
method model_test_matrix (line 134) | def model_test_matrix(lr,name='undefined')
method model_test (line 175) | def model_test(lr,name='undefined')
method test_regression_matrix (line 182) | def test_regression_matrix
method test_regression_rubyengine (line 201) | def test_regression_rubyengine
FILE: test/test_reliability.rb
class StatsampleReliabilityTestCase (line 2) | class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
FILE: test/test_reliability_icc.rb
class StatsampleReliabilityIccTestCase (line 5) | class StatsampleReliabilityIccTestCase < MiniTest::Test
FILE: test/test_reliability_skillscale.rb
class StatsampleReliabilitySkillScaleTestCase (line 4) | class StatsampleReliabilitySkillScaleTestCase < MiniTest::Unit::TestCase
FILE: test/test_resample.rb
class StatsampleResampleTestCase (line 3) | class StatsampleResampleTestCase < MiniTest::Unit::TestCase
method initialize (line 4) | def initialize(*args)
method test_basic (line 7) | def test_basic
method test_repeat_and_save (line 13) | def test_repeat_and_save
FILE: test/test_rserve_extension.rb
class StatsampleRserveExtensionTestCase (line 6) | class StatsampleRserveExtensionTestCase < MiniTest::Unit::TestCase
FILE: test/test_srs.rb
class StatsampleSrsTestCase (line 3) | class StatsampleSrsTestCase < MiniTest::Unit::TestCase
method test_std_error (line 4) | def test_std_error
FILE: test/test_statistics.rb
class StatsampleStatisicsTestCase (line 2) | class StatsampleStatisicsTestCase < MiniTest::Unit::TestCase
method initialize (line 4) | def initialize(*args)
method test_p_using_cdf (line 7) | def test_p_using_cdf
method test_recode_repeated (line 16) | def test_recode_repeated
method test_is_number (line 21) | def test_is_number
method test_estimation_mean (line 33) | def test_estimation_mean
method test_estimation_proportion (line 39) | def test_estimation_proportion
method test_ml (line 55) | def test_ml
method test_simple_linear_regression (line 66) | def test_simple_linear_regression
FILE: test/test_stest.rb
class StatsampleTestTestCase (line 3) | class StatsampleTestTestCase < MiniTest::Unit::TestCase
method test_chi_square_matrix_with_expected (line 4) | def test_chi_square_matrix_with_expected
method test_chi_square_matrix_only_observed (line 14) | def test_chi_square_matrix_only_observed
method test_u_mannwhitney (line 27) | def test_u_mannwhitney
method test_levene (line 38) | def test_levene
method test_levene_dataset (line 44) | def test_levene_dataset
method assert_levene (line 51) | def assert_levene(levene)
FILE: test/test_stratified.rb
class StatsampleStratifiedTestCase (line 3) | class StatsampleStratifiedTestCase < MiniTest::Unit::TestCase
method initialize (line 5) | def initialize(*args)
method test_mean (line 8) | def test_mean
FILE: test/test_test_f.rb
class StatsampleTestFTestCase (line 2) | class StatsampleTestFTestCase < MiniTest::Unit::TestCase
FILE: test/test_test_kolmogorovsmirnov.rb
class StatsampleTestKolmogorovSmirnovTestCase (line 2) | class StatsampleTestKolmogorovSmirnovTestCase < MiniTest::Unit::TestCase
FILE: test/test_test_t.rb
class StatsampleTestTTestCase (line 2) | class StatsampleTestTTestCase < MiniTest::Unit::TestCase
FILE: test/test_umannwhitney.rb
class StatsampleUMannWhitneyTestCase (line 3) | class StatsampleUMannWhitneyTestCase < MiniTest::Unit::TestCase
FILE: test/test_vector.rb
class StatsampleTestVector (line 3) | class StatsampleTestVector < MiniTest::Unit::TestCase
method setup (line 6) | def setup
method assert_counting_tokens (line 11) | def assert_counting_tokens(b)
method test_nominal (line 334) | def test_nominal
method test_equality (line 345) | def test_equality
method test_vector_percentil (line 359) | def test_vector_percentil
method test_ordinal (line 367) | def test_ordinal
method test_linear_percentil_strategy (line 381) | def test_linear_percentil_strategy
method test_ranked (line 396) | def test_ranked
method test_scale (line 404) | def test_scale
method test_vector_centered (line 415) | def test_vector_centered
method test_vector_standarized (line 425) | def test_vector_standarized
method test_vector_standarized_with_zero_variance (line 435) | def test_vector_standarized_with_zero_variance
method test_check_type (line 441) | def test_check_type
method test_add (line 467) | def test_add
method test_minus (line 483) | def test_minus
method test_sum_of_squares (line 499) | def test_sum_of_squares
method test_average_deviation (line 503) | def test_average_deviation
method test_samples (line 507) | def test_samples
method test_valid_data (line 520) | def test_valid_data
method test_set_value (line 536) | def test_set_value
method test_gsl (line 541) | def test_gsl
method test_vector_matrix (line 566) | def test_vector_matrix
method test_marshalling (line 573) | def test_marshalling
method test_dup (line 578) | def test_dup
method test_paired_ties (line 605) | def test_paired_ties
method test_dichotomize (line 610) | def test_dichotomize
method test_can_be_methods (line 624) | def test_can_be_methods
method test_date_vector (line 637) | def test_date_vector
FILE: test/test_wilcoxonsignedrank.rb
class StatsampleUMannWhitneyTestCase (line 3) | class StatsampleUMannWhitneyTestCase < MiniTest::Unit::TestCase
FILE: test/test_xls.rb
class StatsampleExcelTestCase (line 2) | class StatsampleExcelTestCase < MiniTest::Unit::TestCase
Condensed preview — 157 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (697K chars).
[
{
"path": ".gitignore",
"chars": 133,
"preview": "doc.yaml\n*.swp\n*.rbc\ncoverage\n*~\nagregar_adsense_a_doc.rb\npkg\ndoc\n.yardoc\nexamples/images/*\nexamples/*.html\nweb/upload_t"
},
{
"path": ".travis.yml",
"chars": 287,
"preview": "language:\n ruby\n\nrvm:\n - '1.9.3'\n - '2.0.0'\n - '2.1.1'\n\nscript:\n bundle exec rake test\n \nbefore_install:\n - sudo "
},
{
"path": "Gemfile",
"chars": 385,
"preview": "source \"https://www.rubygems.org\"\ngem 'minitest'\ngem 'rdoc'\ngem 'mocha', '0.14.0' #:require=>'mocha/setup'\ngem 'shoulda'"
},
{
"path": "History.txt",
"chars": 21789,
"preview": "=== 1.4.0 / 2014-10-11\n * Replaced README.txt for README.md\n * Replace File.exists? for File.exist?\n + New Dataset.jo"
},
{
"path": "LICENSE.txt",
"chars": 1490,
"preview": "Copyright (c) 2009-2014, Claudio Bustos\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or"
},
{
"path": "Manifest.txt",
"chars": 4617,
"preview": ".travis.yml\nGemfile\nGemfile.lock\nHistory.txt\nLICENSE.txt\nManifest.txt\nREADME.md\nRakefile\nbenchmarks/correlation_matrix_1"
},
{
"path": "README.md",
"chars": 9534,
"preview": "# Statsample\n\nHomepage :: https://github.com/sciruby/statsample\n\n[+'/lib/')\n\nrequire 'rubygems'\nre"
},
{
"path": "benchmarks/correlation_matrix_15_variables.rb",
"chars": 883,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))\n\nextend BenchPress\ncases=250\nvars=20\n\n\nname \"g"
},
{
"path": "benchmarks/correlation_matrix_5_variables.rb",
"chars": 883,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))\n\nextend BenchPress\ncases=500\nvars=5\n\n\nname \"gs"
},
{
"path": "benchmarks/correlation_matrix_methods/correlation_matrix.html",
"chars": 4065,
"preview": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n<html>\n<head>\n<meta http-equi"
},
{
"path": "benchmarks/correlation_matrix_methods/correlation_matrix.rb",
"chars": 2278,
"preview": "# This test create a database to adjust the best algorithm\n# to use on correlation matrix\nrequire(File.expand_path(File."
},
{
"path": "benchmarks/factor_map.rb",
"chars": 1043,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))\n\nextend BenchPress\n\n\nname \"Statsample::Factor:"
},
{
"path": "benchmarks/helpers_benchmark.rb",
"chars": 164,
"preview": "$:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))\n$:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))"
},
{
"path": "doc_latex/manual/equations.tex",
"chars": 1903,
"preview": "\\part{Equations}\n\\section{Convention}\n\\begin{align*}\nn &= \\text{sample size}\\\\\nN &= \\text{population size}\\\\\np &= \\text{"
},
{
"path": "examples/boxplot.rb",
"chars": 374,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\nrequire 'statsample'\nStatsample::Analysis.store(Statsample"
},
{
"path": "examples/correlation_matrix.rb",
"chars": 376,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\nrequire 'statsample'\n\nStatsample::Analysis.store(\"Statsamp"
},
{
"path": "examples/dataset.rb",
"chars": 394,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsampl"
},
{
"path": "examples/dominance_analysis.rb",
"chars": 634,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\n\n\nStatsample::Analysis.store(Statsam"
},
{
"path": "examples/dominance_analysis_bootstrap.rb",
"chars": 728,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsampl"
},
{
"path": "examples/histogram.rb",
"chars": 228,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsampl"
},
{
"path": "examples/icc.rb",
"chars": 538,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsamp"
},
{
"path": "examples/levene.rb",
"chars": 312,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsamp"
},
{
"path": "examples/multiple_regression.rb",
"chars": 393,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsamp"
},
{
"path": "examples/multivariate_correlation.rb",
"chars": 964,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\nrequire 'mathn'\n\n\nStatsample::Analys"
},
{
"path": "examples/parallel_analysis.rb",
"chars": 883,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\nsamples=150\nvariables=30\niterations="
},
{
"path": "examples/polychoric.rb",
"chars": 876,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n$:.unshift(\"/home/cdx/usr/lib/statsample-bivariate-extensi"
},
{
"path": "examples/principal_axis.rb",
"chars": 630,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsamp"
},
{
"path": "examples/reliability.rb",
"chars": 687,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib')\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsample"
},
{
"path": "examples/scatterplot.rb",
"chars": 327,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n$:.unshift('/home/cdx/dev/reportbuilder/lib/')\n\nrequire 'b"
},
{
"path": "examples/t_test.rb",
"chars": 364,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib')\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsample"
},
{
"path": "examples/tetrachoric.rb",
"chars": 261,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsamp"
},
{
"path": "examples/u_test.rb",
"chars": 344,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib')\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsample"
},
{
"path": "examples/vector.rb",
"chars": 309,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsamp"
},
{
"path": "examples/velicer_map_test.rb",
"chars": 839,
"preview": "#!/usr/bin/ruby\n$:.unshift(File.dirname(__FILE__)+'/../lib/')\n\nrequire 'statsample'\n\nStatsample::Analysis.store(Statsamp"
},
{
"path": "grab_references.rb",
"chars": 452,
"preview": "#!/usr/bin/env ruby1.9\nrequire 'reportbuilder'\nrefs=[]\nDir.glob \"**/*.rb\" do |f|\n next if f=~/pkg/\n\treference=false\n\tFi"
},
{
"path": "lib/spss.rb",
"chars": 4290,
"preview": "# = spss.rb - \n#\n# Provides utilites for working with spss files\n#\n# Copyright (C) 2009 Claudio Bustos\n#\n# Claudio Busto"
},
{
"path": "lib/statsample/analysis/suite.rb",
"chars": 2301,
"preview": "module Statsample\n module Analysis\n class Suite \n include Statsample::Shorthand\n attr_accessor :output\n "
},
{
"path": "lib/statsample/analysis/suitereportbuilder.rb",
"chars": 904,
"preview": "module Statsample\n module Analysis\n class SuiteReportBuilder < Suite\n attr_accessor :rb\n def initialize(op"
},
{
"path": "lib/statsample/analysis.rb",
"chars": 3299,
"preview": "require 'statsample/analysis/suite'\nrequire 'statsample/analysis/suitereportbuilder'\n\nmodule Statsample\n # DSL to creat"
},
{
"path": "lib/statsample/anova/contrast.rb",
"chars": 2402,
"preview": "module Statsample\n module Anova\n class Contrast\n attr_reader :psi\n\n attr_reader :msw\n include Summari"
},
{
"path": "lib/statsample/anova/oneway.rb",
"chars": 6381,
"preview": "module Statsample\n module Anova\n # = Generic Anova one-way.\n # You could enter the sum of squares or the mean squ"
},
{
"path": "lib/statsample/anova/twoway.rb",
"chars": 7683,
"preview": "module Statsample\n module Anova\n # = Generic Anova two-way.\n # You could enter the sum of squares or the mean squ"
},
{
"path": "lib/statsample/anova.rb",
"chars": 488,
"preview": "module Statsample\n module Anova\n class << self\n def oneway(*args)\n OneWay.new(*args)\n end \n "
},
{
"path": "lib/statsample/bivariate/pearson.rb",
"chars": 1610,
"preview": "module Statsample\n module Bivariate\n # = Pearson correlation coefficient (r) \n # \n # The moment-product Pearso"
},
{
"path": "lib/statsample/bivariate.rb",
"chars": 12846,
"preview": "require 'statsample/bivariate/pearson'\nmodule Statsample\n # Diverse methods and classes to calculate bivariate relation"
},
{
"path": "lib/statsample/codification.rb",
"chars": 6242,
"preview": "require 'yaml'\n\nmodule Statsample\n # This module aids to code open questions\n # * Select one or more vectors of a data"
},
{
"path": "lib/statsample/converter/csv.rb",
"chars": 2371,
"preview": "module Statsample\n class CSV < SpreadsheetBase\n if RUBY_VERSION<\"1.9\"\n require 'fastercsv'\n CSV_klass=::Fa"
},
{
"path": "lib/statsample/converter/spss.rb",
"chars": 1230,
"preview": "module Statsample\n module SPSS\n class << self\n # Export a SPSS Matrix with tetrachoric correlations .\n #\n "
},
{
"path": "lib/statsample/converters.rb",
"chars": 10702,
"preview": "require 'statsample/converter/spss'\nmodule Statsample\n # Create and dumps Datasets on a database\n module Database\n "
},
{
"path": "lib/statsample/crosstab.rb",
"chars": 5550,
"preview": "module Statsample\n\t# Class to create crosstab of data\n\t# With this, you can create reports and do chi square test\n\t# The"
},
{
"path": "lib/statsample/dataset.rb",
"chars": 29351,
"preview": "require 'statsample/vector'\n\nclass Hash\n # Creates a Statsample::Dataset based on a Hash \n def to_dataset(*args)\n S"
},
{
"path": "lib/statsample/dominanceanalysis/bootstrap.rb",
"chars": 10104,
"preview": "module Statsample\n class DominanceAnalysis\n # == Goal\n # Generates Bootstrap sample to identity the replicability"
},
{
"path": "lib/statsample/dominanceanalysis.rb",
"chars": 13832,
"preview": "module Statsample\n # Dominance Analysis is a procedure based on an examination of the R<sup>2</sup> values\n # for all "
},
{
"path": "lib/statsample/factor/map.rb",
"chars": 4711,
"preview": "module Statsample\n module Factor\n # = Velicer's Minimum Average Partial\n # \n # \"Velicer’s (1976) MAP test involves a"
},
{
"path": "lib/statsample/factor/parallelanalysis.rb",
"chars": 7807,
"preview": "module Statsample\n module Factor\n # Performs Horn's 'parallel analysis' to a principal components analysis\n # to "
},
{
"path": "lib/statsample/factor/pca.rb",
"chars": 7590,
"preview": "# encoding: UTF-8\nmodule Statsample\nmodule Factor\n # Principal Component Analysis (PCA) of a covariance or \n # correla"
},
{
"path": "lib/statsample/factor/principalaxis.rb",
"chars": 7210,
"preview": "module Statsample\nmodule Factor\n # Principal Axis Analysis for a covariance or correlation matrix. \n #\n # For PCA, us"
},
{
"path": "lib/statsample/factor/rotation.rb",
"chars": 5873,
"preview": "module Statsample\nmodule Factor\n # Base class for component matrix rotation.\n #\n # == Reference:\n # * SPSS Manual\n "
},
{
"path": "lib/statsample/factor.rb",
"chars": 3368,
"preview": "require 'statsample/factor/rotation'\nrequire 'statsample/factor/pca'\nrequire 'statsample/factor/principalaxis'\nrequire '"
},
{
"path": "lib/statsample/graph/boxplot.rb",
"chars": 8147,
"preview": "require 'rubyvis'\nmodule Statsample\n module Graph\n # = Boxplot\n # \n # From Wikipedia:\n # In descriptive sta"
},
{
"path": "lib/statsample/graph/histogram.rb",
"chars": 6652,
"preview": "require 'rubyvis'\nmodule Statsample\n module Graph\n \n # In statistics, a histogram is a graphical representation, "
},
{
"path": "lib/statsample/graph/scatterplot.rb",
"chars": 6627,
"preview": "require 'rubyvis'\nmodule Statsample\n module Graph\n # = Scatterplot\n # \n # From Wikipedia:\n # A scatter plot"
},
{
"path": "lib/statsample/graph.rb",
"chars": 297,
"preview": "require 'statsample/graph/scatterplot'\nrequire 'statsample/graph/boxplot'\nrequire 'statsample/graph/histogram'\nmodule St"
},
{
"path": "lib/statsample/histogram.rb",
"chars": 4886,
"preview": "module Statsample\n # A histogram consists of a set of bins which count the \n # number of events falling into a given r"
},
{
"path": "lib/statsample/matrix.rb",
"chars": 7095,
"preview": "class ::Vector\n def to_matrix\n ::Matrix.columns([self.to_a])\n end\n def to_vector\n self\n end\nend\nclass ::Matrix"
},
{
"path": "lib/statsample/multiset.rb",
"chars": 8693,
"preview": "module Statsample\n # Multiset joins multiple dataset with the same fields and vectors\n # but with different number of "
},
{
"path": "lib/statsample/regression/multiple/alglibengine.rb",
"chars": 2958,
"preview": "if HAS_ALGIB\nmodule Statsample\nmodule Regression\nmodule Multiple\n# Class for Multiple Regression Analysis\n# Requires Alg"
},
{
"path": "lib/statsample/regression/multiple/baseengine.rb",
"chars": 7535,
"preview": "module Statsample\n module Regression\n module Multiple\n # Base class for Multiple Regression Engines\n class"
},
{
"path": "lib/statsample/regression/multiple/gslengine.rb",
"chars": 3955,
"preview": "if Statsample.has_gsl?\n module Statsample\n module Regression\n module Multiple\n # Class for Multiple Regr"
},
{
"path": "lib/statsample/regression/multiple/matrixengine.rb",
"chars": 5782,
"preview": "module Statsample\nmodule Regression\nmodule Multiple\n # Pure Ruby Class for Multiple Regression Analysis, based on a cov"
},
{
"path": "lib/statsample/regression/multiple/rubyengine.rb",
"chars": 2391,
"preview": "module Statsample\nmodule Regression\nmodule Multiple\n# Pure Ruby Class for Multiple Regression Analysis.\n# Slower than Al"
},
{
"path": "lib/statsample/regression/multiple.rb",
"chars": 3197,
"preview": "require 'statsample/regression/multiple/baseengine'\nmodule Statsample\n module Regression\n # Module for OLS Multiple "
},
{
"path": "lib/statsample/regression/simple.rb",
"chars": 3381,
"preview": "module Statsample\n module Regression\n # Class for calculation of linear regressions with form\n # y = a+bx\n #"
},
{
"path": "lib/statsample/regression.rb",
"chars": 2458,
"preview": "require 'statsample/regression/simple'\nrequire 'statsample/regression/multiple'\n\nrequire 'statsample/regression/multiple"
},
{
"path": "lib/statsample/reliability/icc.rb",
"chars": 12070,
"preview": "module Statsample\n module Reliability\n # = Intra-class correlation\n # According to Shrout & Fleiss (1979, p.422):"
},
{
"path": "lib/statsample/reliability/multiscaleanalysis.rb",
"chars": 6348,
"preview": "module Statsample\n module Reliability\n # DSL for analysis of multiple scales analysis. \n # Retrieves reliability "
},
{
"path": "lib/statsample/reliability/scaleanalysis.rb",
"chars": 8718,
"preview": "module Statsample\n module Reliability\n # Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.\n # R"
},
{
"path": "lib/statsample/reliability/skillscaleanalysis.rb",
"chars": 3600,
"preview": "module Statsample\n module Reliability\n # Analysis of a Skill Scale\n # Given a dataset with results and a correct "
},
{
"path": "lib/statsample/reliability.rb",
"chars": 4798,
"preview": "module Statsample\n module Reliability\n class << self\n # Calculate Chonbach's alpha for a given dataset.\n #"
},
{
"path": "lib/statsample/resample.rb",
"chars": 347,
"preview": "module Statsample\n module Resample\n class << self\n def repeat_and_save(times,&action)\n (1..times).inject"
},
{
"path": "lib/statsample/rserve_extension.rb",
"chars": 429,
"preview": "# Several additions to Statsample objects, to support\n# rserve-client\n\nmodule Statsample\n class Vector\n def to_REXP\n"
},
{
"path": "lib/statsample/shorthand.rb",
"chars": 3432,
"preview": "class Object\n # Shorthand for Statsample::Analysis.store(*args,&block)\n def ss_analysis(*args,&block)\n Statsample::"
},
{
"path": "lib/statsample/srs.rb",
"chars": 6030,
"preview": "module Statsample\n\t# Several methods to estimate parameters for simple random sampling\n # == Reference: \n # * Cochran,"
},
{
"path": "lib/statsample/test/bartlettsphericity.rb",
"chars": 1507,
"preview": "module Statsample\n module Test\n # == Bartlett's test of Sphericity.\n # Test the hyphotesis that the sample correl"
},
{
"path": "lib/statsample/test/chisquare.rb",
"chars": 1306,
"preview": "module Statsample\n module Test\n module ChiSquare\n class WithMatrix\n attr_reader :df\n attr_reader "
},
{
"path": "lib/statsample/test/f.rb",
"chars": 1913,
"preview": "module Statsample\n module Test\n # From Wikipedia:\n # An F-test is any statistical test in which the test statisti"
},
{
"path": "lib/statsample/test/kolmogorovsmirnov.rb",
"chars": 1748,
"preview": "module Statsample\n module Test\n # == Kolmogorov-Smirnov's test of equality of distributions.\n class KolmogorovSmi"
},
{
"path": "lib/statsample/test/levene.rb",
"chars": 2780,
"preview": "module Statsample\n module Test\n # = Levene Test for Equality of Variances\n # From NIST/SEMATECH:\n # <blockquot"
},
{
"path": "lib/statsample/test/t.rb",
"chars": 11518,
"preview": "module Statsample\n module Test\n \n \n \n \n # A t-test is any statistical hypothesis test in which the test "
},
{
"path": "lib/statsample/test/umannwhitney.rb",
"chars": 6393,
"preview": "module Statsample\n module Test\n #\n # = U Mann-Whitney test\n #\n # Non-parametric test for assessing whether "
},
{
"path": "lib/statsample/test/wilcoxonsignedrank.rb",
"chars": 2719,
"preview": "module Statsample\n module Test\n # From Wikipedia:\n # The Wilcoxon signed-rank test is a non-parametric statistica"
},
{
"path": "lib/statsample/test.rb",
"chars": 2873,
"preview": "module Statsample\n # Module for several statistical tests\n \n module Test\n autoload(:UMannWhitney, 'statsample/test"
},
{
"path": "lib/statsample/vector/gsl.rb",
"chars": 2640,
"preview": "module Statsample\n class Vector\n module GSL_ \n def clear_gsl\n @gsl=nil\n end\n \n def s"
},
{
"path": "lib/statsample/vector.rb",
"chars": 30618,
"preview": "require 'date'\nrequire 'statsample/vector/gsl'\n\nmodule Statsample::VectorShorthands\n # Creates a new Statsample::Vector"
},
{
"path": "lib/statsample/version.rb",
"chars": 42,
"preview": "module Statsample\n VERSION = '1.4.0'\nend\n"
},
{
"path": "lib/statsample.rb",
"chars": 8161,
"preview": "# = statsample.rb - \n# Statsample - Statistic package for Ruby\n# Copyright (C) 2008-2014 Claudio Bustos\n#\n# This progra"
},
{
"path": "po/es/statsample.po",
"chars": 23314,
"preview": "msgid \"\"\nmsgstr \"\"\n\"Project-Id-Version: statsample 1.0.1\\n\"\n\"POT-Creation-Date: 2011-03-03 12:03-0300\\n\"\n\"PO-Revision-Da"
},
{
"path": "po/statsample.pot",
"chars": 19629,
"preview": "# Statsample po template.\n# Copyright (C) 2009-2009 Claudio Bustos\n# This file is distributed under the same license as "
},
{
"path": "references.txt",
"chars": 2733,
"preview": "References\n* Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regre"
},
{
"path": "setup.rb",
"chars": 36162,
"preview": "#\n# setup.rb\n#\n# Copyright (c) 2000-2005 Minero Aoki\n#\n# This program is free software.\n# You can distribute/modify this"
},
{
"path": "test/fixtures/correlation_matrix.rb",
"chars": 738,
"preview": "# Retrieve Correlation matrix for eigth variables\nmodule Statsample\n module Fixtures\n def harman_817\n Matrix[\n "
},
{
"path": "test/fixtures/hartman_23.matrix",
"chars": 540,
"preview": "\"height\" \"arm.span\" \"forearm\" \"lower.leg\" \"weight\" \"bitro.diameter\" \"chest.girth\" \"chest.width\"\n\"height\" 1 0.846 0.805 0"
},
{
"path": "test/fixtures/repeated_fields.csv",
"chars": 246,
"preview": "\"id\",\"name\",\"age\",\"city\",\"a1\",\"name\",\"age\"\n1,\"Alex\",20,\"New York\",\"a,b\",\"a\",3\n2,\"Claude\",23,\"London\",\"b,c\",\"b\",4\n3,\"Pete"
},
{
"path": "test/fixtures/stock_data.csv",
"chars": 2935,
"preview": "17.66\n17.65\n17.68\n17.66\n17.68\n17.67\n17.68\n17.68\n17.67\n17.67\n17.68\n17.71\n17.74\n17.72\n17.73\n17.76\n17.74\n17.69\n17.69\n17.67\n"
},
{
"path": "test/fixtures/test_csv.csv",
"chars": 183,
"preview": "\"id\",\"name\",\"age\",\"city\",\"a1\"\n1,\"Alex\",20,\"New York\",\"a,b\"\n2,\"Claude\",23,\"London\",\"b,c\"\n3,\"Peter\",25,\"London\",\"a\"\n4,\"Fra"
},
{
"path": "test/fixtures/tetmat_matrix.txt",
"chars": 310,
"preview": " 1.0000000 0.1703164 0.2275128 0.1071861 0.0665047\r\n 0.1703164 1.0000000 0.1890911 0.1111471 0.17242"
},
{
"path": "test/fixtures/tetmat_test.txt",
"chars": 12001,
"preview": " 1 1 1 1 1\r\n 1 1 1 1 1\r\n 1 1 1 1 1\r\n 1 1 1 1 2\r\n 1 1 1 1 2\r\n 1 1 1 1 2\r\n 1 1 1 1 2\r\n 1 1 1 1 2\r\n 1 1 1 1 2\r\n 1 1 1 2 1\r\n"
},
{
"path": "test/helpers_tests.rb",
"chars": 2347,
"preview": "$:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))\n$:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))"
},
{
"path": "test/test_analysis.rb",
"chars": 5946,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleAnalysisTestCase < MiniTest::Unit"
},
{
"path": "test/test_anova_contrast.rb",
"chars": 1475,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleAnovaContrastTestCase < MiniTest::"
},
{
"path": "test/test_anovaoneway.rb",
"chars": 894,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleAnovaOneWayTestCase < MiniTest::Un"
},
{
"path": "test/test_anovatwoway.rb",
"chars": 1347,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleAnovaTwoWayTestCase < MiniTest::Un"
},
{
"path": "test/test_anovatwowaywithdataset.rb",
"chars": 1900,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n# Reference:\n# * http://www.uwsp.edu/psych/Stat/13"
},
{
"path": "test/test_anovawithvectors.rb",
"chars": 3874,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleAnovaOneWayWithVectorsTestCase < M"
},
{
"path": "test/test_awesome_print_bug.rb",
"chars": 377,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleAwesomePrintBug < MiniTest::Test\n "
},
{
"path": "test/test_bartlettsphericity.rb",
"chars": 895,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleBartlettSphericityTestCase < Mini"
},
{
"path": "test/test_bivariate.rb",
"chars": 7542,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleBivariateTestCase < MiniTest::Test"
},
{
"path": "test/test_codification.rb",
"chars": 3083,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleCodificationTestCase < MiniTest::U"
},
{
"path": "test/test_crosstab.rb",
"chars": 2155,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleCrosstabTestCase < MiniTest::Unit:"
},
{
"path": "test/test_csv.rb",
"chars": 2865,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleCSVTestCase < MiniTest::Unit::Test"
},
{
"path": "test/test_dataset.rb",
"chars": 16177,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleDatasetTestCase < MiniTest::Unit::"
},
{
"path": "test/test_dominance_analysis.rb",
"chars": 1868,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleDominanceAnalysisTestCase < MiniTe"
},
{
"path": "test/test_factor.rb",
"chars": 7645,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n#require 'rserve'\n#require 'statsample/rserve_exte"
},
{
"path": "test/test_factor_map.rb",
"chars": 1369,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n#require 'rserve'\n#require 'statsample/rserve_exte"
},
{
"path": "test/test_factor_pa.rb",
"chars": 1790,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n#require 'rserve'\n#require 'statsample/rserve_exte"
},
{
"path": "test/test_ggobi.rb",
"chars": 1254,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nrequire 'ostruct'\nclass StatsampleGGobiTestCase < "
},
{
"path": "test/test_gsl.rb",
"chars": 515,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleGSLTestCase < MiniTest::Unit::Test"
},
{
"path": "test/test_histogram.rb",
"chars": 3632,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\n\nclass StatsampleHistogramTestCase < MiniTest::Un"
},
{
"path": "test/test_matrix.rb",
"chars": 1498,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleMatrixTestCase < MiniTest::Unit::"
},
{
"path": "test/test_multiset.rb",
"chars": 5779,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\n\nclass StatsampleMultisetTestCase < MiniTest::Uni"
},
{
"path": "test/test_regression.rb",
"chars": 9321,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleRegressionTestCase < MiniTest::Un"
},
{
"path": "test/test_reliability.rb",
"chars": 9287,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleReliabilityTestCase < MiniTest::Un"
},
{
"path": "test/test_reliability_icc.rb",
"chars": 6735,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\n$reliability_icc=nil\n\nclass StatsampleReliability"
},
{
"path": "test/test_reliability_skillscale.rb",
"chars": 2687,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\n\nclass StatsampleReliabilitySkillScaleTestCase < "
},
{
"path": "test/test_resample.rb",
"chars": 579,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleResampleTestCase < MiniTest::Unit"
},
{
"path": "test/test_rserve_extension.rb",
"chars": 1437,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nbegin\n require 'rserve'\n require 'statsample/rse"
},
{
"path": "test/test_srs.rb",
"chars": 388,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleSrsTestCase < MiniTest::Unit::Tes"
},
{
"path": "test/test_statistics.rb",
"chars": 2502,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleStatisicsTestCase < MiniTest::Unit"
},
{
"path": "test/test_stest.rb",
"chars": 1708,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleTestTestCase < MiniTest::Unit::Te"
},
{
"path": "test/test_stratified.rb",
"chars": 413,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleStratifiedTestCase < MiniTest::Un"
},
{
"path": "test/test_test_f.rb",
"chars": 931,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleTestFTestCase < MiniTest::Unit::Te"
},
{
"path": "test/test_test_kolmogorovsmirnov.rb",
"chars": 1451,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleTestKolmogorovSmirnovTestCase < Mi"
},
{
"path": "test/test_test_t.rb",
"chars": 2482,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleTestTTestCase < MiniTest::Unit::Te"
},
{
"path": "test/test_umannwhitney.rb",
"chars": 927,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleUMannWhitneyTestCase < MiniTest::"
},
{
"path": "test/test_vector.rb",
"chars": 22415,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleTestVector < MiniTest::Unit::Test"
},
{
"path": "test/test_wilcoxonsignedrank.rb",
"chars": 2001,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\n\nclass StatsampleUMannWhitneyTestCase < MiniTest::"
},
{
"path": "test/test_xls.rb",
"chars": 1836,
"preview": "require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))\nclass StatsampleExcelTestCase < MiniTest::Unit::Te"
},
{
"path": "web/Rakefile",
"chars": 880,
"preview": "# -*- ruby -*-\nrequire 'rake'\nrequire 'fileutils'\ndirectory \"examples\"\n\ndef get_base(f)\n f.sub(File.dirname(__FILE__)+\""
}
]
// ... and 8 more files (download for full content)
About this extraction
This page contains the full source code of the clbustos/statsample GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 157 files (643.6 KB), approximately 211.3k tokens, and a symbol index with 1478 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.