Repository: clbustos/statsample
Branch: master
Commit: d5caf4ecf82c
Files: 157
Total size: 643.6 KB

Directory structure:
gitextract_b74amxs6/

├── .gitignore
├── .travis.yml
├── Gemfile
├── History.txt
├── LICENSE.txt
├── Manifest.txt
├── README.md
├── Rakefile
├── benchmarks/
│   ├── correlation_matrix_15_variables.rb
│   ├── correlation_matrix_5_variables.rb
│   ├── correlation_matrix_methods/
│   │   ├── correlation_matrix.ds
│   │   ├── correlation_matrix.html
│   │   ├── correlation_matrix.rb
│   │   ├── correlation_matrix.xls
│   │   ├── correlation_matrix_gsl_ruby.ods
│   │   ├── correlation_matrix_with_graphics.ods
│   │   └── results.ds
│   ├── factor_map.rb
│   └── helpers_benchmark.rb
├── data/
│   └── locale/
│       └── es/
│           └── LC_MESSAGES/
│               └── statsample.mo
├── doc_latex/
│   └── manual/
│       └── equations.tex
├── examples/
│   ├── boxplot.rb
│   ├── correlation_matrix.rb
│   ├── dataset.rb
│   ├── dominance_analysis.rb
│   ├── dominance_analysis_bootstrap.rb
│   ├── histogram.rb
│   ├── icc.rb
│   ├── levene.rb
│   ├── multiple_regression.rb
│   ├── multivariate_correlation.rb
│   ├── parallel_analysis.rb
│   ├── polychoric.rb
│   ├── principal_axis.rb
│   ├── reliability.rb
│   ├── scatterplot.rb
│   ├── t_test.rb
│   ├── tetrachoric.rb
│   ├── u_test.rb
│   ├── vector.rb
│   └── velicer_map_test.rb
├── grab_references.rb
├── lib/
│   ├── spss.rb
│   ├── statsample/
│   │   ├── analysis/
│   │   │   ├── suite.rb
│   │   │   └── suitereportbuilder.rb
│   │   ├── analysis.rb
│   │   ├── anova/
│   │   │   ├── contrast.rb
│   │   │   ├── oneway.rb
│   │   │   └── twoway.rb
│   │   ├── anova.rb
│   │   ├── bivariate/
│   │   │   └── pearson.rb
│   │   ├── bivariate.rb
│   │   ├── codification.rb
│   │   ├── converter/
│   │   │   ├── csv.rb
│   │   │   └── spss.rb
│   │   ├── converters.rb
│   │   ├── crosstab.rb
│   │   ├── dataset.rb
│   │   ├── dominanceanalysis/
│   │   │   └── bootstrap.rb
│   │   ├── dominanceanalysis.rb
│   │   ├── factor/
│   │   │   ├── map.rb
│   │   │   ├── parallelanalysis.rb
│   │   │   ├── pca.rb
│   │   │   ├── principalaxis.rb
│   │   │   └── rotation.rb
│   │   ├── factor.rb
│   │   ├── graph/
│   │   │   ├── boxplot.rb
│   │   │   ├── histogram.rb
│   │   │   └── scatterplot.rb
│   │   ├── graph.rb
│   │   ├── histogram.rb
│   │   ├── matrix.rb
│   │   ├── multiset.rb
│   │   ├── regression/
│   │   │   ├── multiple/
│   │   │   │   ├── alglibengine.rb
│   │   │   │   ├── baseengine.rb
│   │   │   │   ├── gslengine.rb
│   │   │   │   ├── matrixengine.rb
│   │   │   │   └── rubyengine.rb
│   │   │   ├── multiple.rb
│   │   │   └── simple.rb
│   │   ├── regression.rb
│   │   ├── reliability/
│   │   │   ├── icc.rb
│   │   │   ├── multiscaleanalysis.rb
│   │   │   ├── scaleanalysis.rb
│   │   │   └── skillscaleanalysis.rb
│   │   ├── reliability.rb
│   │   ├── resample.rb
│   │   ├── rserve_extension.rb
│   │   ├── shorthand.rb
│   │   ├── srs.rb
│   │   ├── test/
│   │   │   ├── bartlettsphericity.rb
│   │   │   ├── chisquare.rb
│   │   │   ├── f.rb
│   │   │   ├── kolmogorovsmirnov.rb
│   │   │   ├── levene.rb
│   │   │   ├── t.rb
│   │   │   ├── umannwhitney.rb
│   │   │   └── wilcoxonsignedrank.rb
│   │   ├── test.rb
│   │   ├── vector/
│   │   │   └── gsl.rb
│   │   ├── vector.rb
│   │   └── version.rb
│   └── statsample.rb
├── po/
│   ├── es/
│   │   ├── statsample.mo
│   │   └── statsample.po
│   └── statsample.pot
├── references.txt
├── setup.rb
├── test/
│   ├── fixtures/
│   │   ├── correlation_matrix.rb
│   │   ├── hartman_23.matrix
│   │   ├── repeated_fields.csv
│   │   ├── stock_data.csv
│   │   ├── test_csv.csv
│   │   ├── test_xls.xls
│   │   ├── tetmat_matrix.txt
│   │   └── tetmat_test.txt
│   ├── helpers_tests.rb
│   ├── test_analysis.rb
│   ├── test_anova_contrast.rb
│   ├── test_anovaoneway.rb
│   ├── test_anovatwoway.rb
│   ├── test_anovatwowaywithdataset.rb
│   ├── test_anovawithvectors.rb
│   ├── test_awesome_print_bug.rb
│   ├── test_bartlettsphericity.rb
│   ├── test_bivariate.rb
│   ├── test_codification.rb
│   ├── test_crosstab.rb
│   ├── test_csv.rb
│   ├── test_dataset.rb
│   ├── test_dominance_analysis.rb
│   ├── test_factor.rb
│   ├── test_factor_map.rb
│   ├── test_factor_pa.rb
│   ├── test_ggobi.rb
│   ├── test_gsl.rb
│   ├── test_histogram.rb
│   ├── test_matrix.rb
│   ├── test_multiset.rb
│   ├── test_regression.rb
│   ├── test_reliability.rb
│   ├── test_reliability_icc.rb
│   ├── test_reliability_skillscale.rb
│   ├── test_resample.rb
│   ├── test_rserve_extension.rb
│   ├── test_srs.rb
│   ├── test_statistics.rb
│   ├── test_stest.rb
│   ├── test_stratified.rb
│   ├── test_test_f.rb
│   ├── test_test_kolmogorovsmirnov.rb
│   ├── test_test_t.rb
│   ├── test_umannwhitney.rb
│   ├── test_vector.rb
│   ├── test_wilcoxonsignedrank.rb
│   └── test_xls.rb
└── web/
    └── Rakefile

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
doc.yaml
*.swp
*.rbc
coverage
*~
agregar_adsense_a_doc.rb
pkg
doc
.yardoc
examples/images/*
examples/*.html
web/upload_task.rb
.idea


================================================
FILE: .travis.yml
================================================
language:
  ruby

rvm:
  - '1.9.3'
  - '2.0.0'
  - '2.1.1'

script:
  bundle exec rake test
  
before_install:
  - sudo apt-get update -qq
  - sudo apt-get install -y libgsl0-dev r-base r-base-dev
  - sudo Rscript -e "install.packages(c('Rserve','irr'),,'http://cran.us.r-project.org')"


================================================
FILE: Gemfile
================================================
source "https://www.rubygems.org"
gem 'minitest'
gem 'rdoc'
gem 'mocha', '0.14.0' #:require=>'mocha/setup'
gem 'shoulda','3.5.0'
gem 'shoulda-matchers','2.2.0'
gem 'hoe'
#gem 'bio-statsample-timeseries'
gem 'reportbuilder'
gem 'dirty-memoize'
gem 'distribution'
gem 'extendmatrix'
gem 'minimization'
gem 'rserve-client'
gem 'rubyvis'
gem 'spreadsheet'
gem 'rb-gsl'
gem 'awesome_print'


================================================
FILE: History.txt
================================================
=== 1.4.0 / 2014-10-11
  * Replaced README.txt for README.md
  * Replace File.exists? for File.exist?
  + New Dataset.join to join two dataset based on some fields
  * Deleted MLE based regression (Probit and logistic). Now all GML methods are on statsample-glm
  
=== 1.3.1 / 2014-06-26

  * Example referred to a SimpleRegression class which doesn't exist. Updated to working example.
  * Merge pull request #15 from Blahah/patch-1
  * Updated Gemfile
  * Updated README.txt for v1.3.0
  * Updated to ruby 2.1.0

=== 1.3.0 / 2013-09-19

  * Merge remote-tracking branch 'vpereira/master' into vpereira
  * New Wilcoxon Signed Rank test
  * Remove TimeSeries class. Now is available on gem "bio-statsample-timeseries" [GSOC 2013 project :) ]
  * Update shoulda support
  * added Bundle depds
  * improved the csv read method (requires tests)
  * open svg on mac osx

=== 1.2.0 / 2011-12-15
  
  * Added support for time series (TimeSeries object): MA, EMA, MACD, acf, lag and delta. [Rob Britton]
  * Changed summary attribute to properly display 'b' value for simple linear regression [hstove]
  * Merge pull request #6 from hstove/patch-1Changed summary attribute to properly display 'b' value for simple linear regression [Claudio Bustos]
  * fix example code for CovariateMatrix [James Kebinger]

=== 1.1.0 / 2011-06-02

* New Statsample::Anova::Contrast
* Jacknife and bootstrap for Vector. Thanks to John Firebaugh for the idea
* Improved Statsample::Analysis API
* Updated CSV.read. Third argument is a Hash with options to CSV class  
* Added restriction on Statsample::Excel.read
* Updated spanish po 
* Better summary for Vector
* Improving summary of t related test (confidence interval and estimate output)
* Replaced c for vector on Statsample::Analysis examples
* Added Vector#median_absolute_deviation
* First implementation of Kolmogorov Smirnov test. Returns correct D value, but without Kolmogorov distribution isn't very useful.

=== 1.0.1 / 2011-01-28

* Updated spanish po.
* Update distribution gem dependence. On Ruby 1.8.7, distribution 0.2.0 raises an error.

=== 1.0.0 / 2011-01-27

* Added Statsample::Analysis, a beautiful DSL to perform fast statistical analysis using statsample. See directory /examples
* Created benchmarks directory 
* Removed Distribution module from statsample and moved to a gem. Changes on code to reflect new API
* Optimized simple regression.  Better library detection
* New 'should_with_gsl' to test methods with gsl. Refactored Factor::MAP
* Almost complete GSL cleanup on Vector
* Updated some doc on Vector
* Used GSL::Matrix on Factor classes when available
* SkillScaleAnalysis doesn't crash with one or more vectors with 0 variance
* Modified examples using Statsample::Analysis
* Simplified eigen calculations
* Updated some examples. Added correlation matrix speed suite
* Correlation matrix optimized. Better specs 
* Optimized correlation matrix. Use gsl matrix algebra or pairwise correlations depending on empiric calculated equations. See benchmarks/correlation_matrix.rb to see implementation of calculation 
* Moved tests fixtures from data to test/fixtures
* Fixed some errors on tests
* Bug fix: constant_se on binomial regression have an error 
* All test should work on ruby 1.9.3 
* New Vector.[] and Vector.new_scale
* Detect linearly dependent predictors on OLS. 

=== 0.18.0 / 2011-01-07
* New Statsample.load_excel 
* New Statsample.load_csv
* Statsample::Dataset#[] accepts an array of fields and uses clone
* New Dataset#correlation_matrix  and Statsample::Dataset#covariance_matrix
* Statsample::Dataset.filter add labels to vectors
* Principal Components generation complete on PCA (covariance matrix prefered)
* Added note on Statsample::Factor::PCA about erratic signs on eigenvalues,
* Statsample::Factor::PCA.component_matrix calculated different for covariance matrix
* Improved summary for PCA using covariance matrix
* New attribute :label_angle for Statsample::Graph::Boxplot
* Fixed Scatterplots scaling problems
* New attributes for Scatterplots: groups, minimum_x, minimum_y, maximum_x, 
* New Statsample::Multiset#union allows to create a new dataset based on a m
* New Statsample::Multiset#each to traverse through datasets
* Bug fix: Vector#standarized and Vector#percentile crash on nil data
* Bug fix: Vector#mean and Vector#sd crash on data without valid values
* Modified methods names on Statsample::Factor::PCA : feature_vector to feature_matrix, data_transformation to principal_components
* Added Statsample::Vector.vector_centered
* Factor::MAP.with_dataset() implemented 
* Bug fix: Factor::MAP with correlation matrix with non-real eigenvalues crashes * Added documentation for Graph::Histogram 
* Added MPA to Reliability::MultiScaleAnalysis
* Added custom names for returned vectors and datasets 
* Updated spanish traslation 
* Graph::Histogram updated. Custom x and y max and min, optional normal distribution drawing 
* Updated Histogram class, with several new methods compatibles with GSL::Histogram

=== 0.17.0 / 2010-12-09
* Added Statsample::Graph::Histogram and Statsample::Graph::Boxplot
* Added Statsample::Reliability::SkillScaleAnalysis for analysis of skill based scales.
* Delete combination and permutation clases. Backport for ruby 1.8.7 widely available
* Deleted unused variables (thanks, ruby-head)

=== 0.16.0 / 2010-11-13
* Works on ruby 1.9.2 and HEAD. Updated Rakefile and manifest
* Removed all graph based on Svg::Graph. 
* First operative version of Graph with Rubyvis
* Corrected bug on Distribution::Normal.cdf. 
* Added reference on references.txt
* Ruby-based random gaussian distribution generator when gsl not available
* Added population average deviation [Al Chou]

=== 0.15.1 / 2010-10-20
* Statsample::Excel and Statsample::PlainText add name to vectors equal to field name 
* Statsample::Dataset.delete_vector accept multiple fields.
* Statsample::Dataset.dup_only_valid allows duplication of specific fields 
* ScaleAnalysis doesn't crash on one-item scales 
* Updated references

=== 0.15.0 / 2010-09-07
* Added class Statsample::Reliability::ICC for calculation of Intra-class correlation (Shrout & Fleiss, 1979; McGraw & Wong, 1996). Tested with SPSS and R values.
* References: Updated and standarized references on many classes. Added grab_references.rb script, to create a list of references for library
* Added Spearman-Brown prophecy on Reliability module
* Distribution::F uses Gsl when available
* Added mean r.p.b. and item sd on Scale Analysis
* Corrected bug on Vector.ary_method and example of Anova Two Way using vector.  


=== 0.14.1 / 2010-08-18

* Added extra information on $DEBUG=true. 
* Changed ParallelAnalysis: with_random_data parameters, bootstrap_method options are data and random, resolve bug related to number of factors to preserve, resolved bug related to original eigenvalues, can support failed bootstrap of data for Tetrachoric  correlation. 
* Optimized eigenpairs on Matrix when GSL is available. 
* Added test for parallel analysis using data bootstraping
* Updated .pot and Manifest.txt
* Added test for kmo(global and univariate), bartlett and anti-image. Kmo and Bartlett have test based on Dziuban and Shirkey with correct results
* Complete set of test to test if a correlation matrix is appropriate for factor analysis: test of sphericity, KMO and anti-image (see Dziuban and Shirkey, 1974)
* Updated Parallel Analysis to work on Principal Axis Analysis based on O'Connors formulae
* Added reference for Statsample::Factor::MAP

=== 0.14.0 / 2010-08-16
* Added Statsample::Factor::MAP, to execute Velicer's (1976) MAP to determine the number of factors to retain on EFA 
* Bug fix on test suite on Ruby 1.8.7
* Horn's Parallel Analysis operational and tested for pure random data
* Fixed bug on Excel writer on Ruby1.9 (frozen string on header raises an error). 
* Extra information on Factorial Analysis on summaries
* Fixed bug on Factor::Rotation when used ::Matrix without field method. 
* Added Vector#vector_percentil method
* Summaries for PCA, Rotation, MultiScale and ScaleAnalysis created or improved. 
* Factor::PCA could have rotation and parallel analysis on summary.
* Cronbach's alpha from covariance matrix raise an error on size<2 
* MultiScaleAnalysis could have Parallel Analysis on summary.
* Added Chi Square test
* Added new information on README.txt

=== 0.13.1 / 2010-07-03

* Rserve extensions for dataset and vector operational
* On x86_64, variance from gsl is not exactly equal to sum of variance-covariance on Statsample::Reliability::Scale, but in delta 1e-10
* Updated README.txt
* Reliability::ScaleAnalysis uses covariance matrix for 'if deleted' calculations to optimize memory and speed. Test for 'if deleted' statistics
* More string translated. Added dependency on tetrachoric on parallel analysis

=== 0.13.0 / 2010-06-13

* Polychoric and Tetrachoric moved to gem statsample-bivariate-extension
* All classes left with summary method include Summarizable now. Every method which return localizable string is now parsed with _()
* Correct implementation of Reliability::MultiScaleAnalysis. 
* Spanish translation for Mann-Whitney's U
* Added example for Mann-Whitney's U test
* Better summary for Mann-Whitney's U Test
* Added Statsample::Bivariate::Pearson class to retrieve complete analysis for r correlations
* Bug fix on DominanceAnalysis::Bootstrap

=== 0.12.0 / 2010-06-09

* Modified Rakefile to remove dependencies based on C extensions. These are moved to statsample-optimization
* T test with unequal variance fixed on i686
* API Change: Renamed Reliability::ItemAnalysis and moved to independent file 
* New Reliability::MultiScaleAnalysis for easy analysis of scales on a same survey, includind reliability, correlation matrix and Factor Analysis
* Updated README to reflect changes on Reliability module
* SvgGraph works with reportbuilder. 
* Added methods on Polychoric based on Olsson(1979): the idea is estimate using second derivatives.
* Distribution test changed (reduced precision on 32 bits system

=== 0.11.2 / 2010-05-05
* Updated dependency for 'extendedmatrix' to 0.2 (Matrix#build method)

=== 0.11.1 / 2010-05-04
* Removed Matrix almost all Matrix extensions and replaced by dependency on 'extendmatrix' gem
* Added dependency to gsl >=1.12.109. Polychoric with joint method fails without this explicit dependency
=== 0.11.0 / 2010-04-16
<b>New features:</b>
* Added Statsample::Anova::TwoWay and Statsample::Anova::TwoWayWithVectors
* Added Statsample.clone_only valid and Statsample::Dataset.clone_only_valid, for cheap copy on already clean vectors
<b>Optimizations and bug fix</b> 
* Removed library statistics2 from package. Used gem statistics2 instead, because have a extension version
* Added example for Reliability class
* Bug fix on Statsample::DominanceAnalysis

=== 0.10.0 / 2010-04-13

<b>API modifications</b>
* Refactoring of Statsample::Anova module. 
  * Statsample::Anova::OneWay :implementation of generic ANOVA One-Way, used by Multiple Regression, for example.
  * Statsample::Anova::OneWayWithVectors: implementation of ANOVA One-Way to test differences of means.

<b>New features</b>
* New Statsample::Factor::Parallel Analysis, to performs Horn's 'parallel analysis' to a PCA, to adjust for sample bias on retention of components.
* New Statsample.only_valid_clone and Statsample::Dataset.clone, which allows to create shallow copys of valid vector and datasets. Used by correlation matrix methods to optimize calculations
* New module Statsample::Summarizable, which add GetText and ReportBuilder support to classes. Better summaries for Vector, Dataset, Crosstab, PrincipalAxis, PCA and Regression::Multiple classes

<b>Optimizations and bug fix</b>

* Refactoring of Statsample::Regression::Multiple classes. Still needs works
* Bug fix on Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis
* Bug fix on Statsample::Bivariate::Polychoric.new_with_vectors. Should be defined class method, no instance method.
* Optimized correlation and covariance matrix. Only calculates the half of matrix and the other half is returned from cache
* More tests coverage. RCOV Total: 82.51% , Code: 77.83%

=== 0.9.0 / 2010-04-04
* New Statsample::Test::F. Anova::OneWay subclasses it and Regression classes uses it.
=== 0.8.2 / 2010-04-01
* Statsample::PromiseAfter replaced by external package DirtyMemoize [http://rubygems.org/gems/dirty-memoize]
=== 0.8.1 / 2010-03-29
* Fixed Regression summaries
=== 0.8.0 / 2010-03-29
* New Statsample::Test::T module, with classes and methods to do Student's t tests for one and two samples. 
* Statsample::PromiseAfter module to set a number of variables without explicitly call the compute or iterate method
* All tests ported to MiniUnit
* Directory 'demo' renamed to 'examples'
* Bug fix on report_building on Statsample::Regression::Multiple classes

=== 0.7.0 / 2010-03-25
* Ported to ReportBuilder 1.x series
* Implementation of ruby based covariance and correlation changed to a clearer code
* Statsample::Vector#svggraph_frequencies accepts IO
* Some test ported to Miniunit
* CSV on Ruby1.8 uses FasterCSV

=== 0.6.7 / 2010-03-23
* Bug fix: dependency on ReportBuilder should be set to "~>0.2.0", not "0.2"
=== 0.6.6 / 2010-03-22
* Set ReportBuilder dependency to '0.2.~' version, because future API break
* Removed Alglib dependency
* Factor::PrincipalAxis and Factor::PCA reworked
* Standarization of documentation on almost every file
* New Statsample::Test::Levene, to test equality of variances
* Constant HAS_GSL replaced by Statsample.has_gsl?
* PCA and Principal Axis test based on R and SPSS results
* Bug fix on test_dataset.rb / test_saveload
* Added Rakefile
* Demos for levene, Principal Axis

=== 0.6.5 / 2010-02-24

* Bug fix on test: Use tempfile instead of tempdir
* Multiple Regression: Calculation of constant standard error , using covariance matrix.
* Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
* Dominance Analysis could use Correlation or Covariance Matrix as input.
* Dominance Analysis extension to multiple dependent variables (Azen & Budescu, 2006)
* Two-step estimate of Polychoric correlation uses minimization gem, so could be executed without rb-gsl


=== 0.6.4 / 2010-02-19
* Dominance Analysis and Dominance Analysis Bootstrap allows multivariate dependent analysis. 
* Test suite for Dominance Analysis, using Azen and Budescu papers as references
* X^2 for polychoric correlation

=== 0.6.3 / 2010-02-15
* Statsample::Bivariate::Polychoric have joint estimation.
* Some extra documentation and bug fixs

=== 0.6.2 / 2010-02-11
* New Statsample::Bivariate::Polychoric. For implement: X2 and G2
* New matrix.rb, for faster development of Contingence Tables and Correlation Matrix

=== 0.6.1 / 2010-02-08
* Bug fix on DominanceAnalysis summary for Ruby1.9
* Some extra documentation
=== 0.6.0 / 2010-02-05
* New Statsample::Factor module. Include classes for extracting factors (Statsample::Factor::PCA and  Statsample::Factor::PrincipalAxis) and rotate component matrix  ( Statsample::Factor::Rotation subclasses). For now, only orthogonal rotations
* New Statsample::Dataset.crosstab_with_asignation, Statsample::Dataset.one_to_many
* New class Statsample::Permutation to produce permutations of a given array
* New class Statsample::Histogram, with same interface as GSL one 
* New class Statsample::Test::UMannWhitney, to perform Mann-Whitney's U test. Gives z based and exact calculation of probability
* Improved support for ReportBuilder
* Statsample::Codification module reworked
* Fixed bugs on Dominance Analysis classes
* Fixed bugs on Statsample::Vector.kurtosis and Statsample::Vector.skew 

=== 0.5.1 / 2009-10-06

* New class Statsample::Bivariate::Tetrachoric, for calculation of tetrachoric correlations. See http://www.john-uebersax.com/stat/tetra.htm for information.
* New Statsample::Dataset.merge
* New Statsample::Vector.dichotomize
* New ItemReliability.item_difficulty_analysis
* New module Statsample::SPSS, to export information to SPSS. For now, only tetrachoric correlation matrix are provided
* All SpreadSheet based importers now accept repeated variable names and renames they on the fly
* MultipleRegression::BaseEngine moved to new file
* Bug fix for MultipleRegression::GslEngine checks for Alglib, not GSL

=== 0.5.0 / 2009-09-26
* Vector now uses a Hash as a third argument
* Tested on Ruby 1.8.6, 1.8.7 and 1.9.1 with multiruby

=== 0.4.1 / 2009-09-12
* More methods and usage documentation
* Logit tests
* Bug fix: rescue for requires doesn't specify LoadError
* Binomial::BaseEngine new methods: coeffs_se, coeffs, constant and constant_se

=== 0.4.0 / 2009-09-10
* New Distribution module, based on statistics2.rb by Shin-ichiro HARA. Replaces all instances of GSL distributions pdf and cdf calculations for native calculation.
* New Maximum Likehood Estimation for Logit, Probit and Normal Distribution using Von Tessin(2005) algorithm. See MLE class and subclasses for more information.
* New Binomial regression subclasses (Logit and Probit), usign MLE class
* Added tests for gsl, Distribution, MLE and Logit
* Bug fix on svggraph.rb. Added check_type for scale graphics
* Bug fix on gdchart. Replaced old Nominal, Ordinal and Scale for Vector

=== 0.3.4 / 2009-08-21
* Works with statsample-optimization 2.0.0
* Vector doesn't uses delegation. All methods are part of Vector
* Added Combination. Generates all combination of n elements taken r at a time
* Bivariate#prop_pearson now can uses as a second parameter :both, :left, :right, :positive or :negative
* Added LICENSE.txt

=== 0.3.3 / 2009-08-11
* Added i18n support. For now, only spanish translation available
* Bug fix: Test now load libraries on ../lib path
* Excel and CSV importers automatically modify type of vector to Scale when all data are numbers or nils values

=== 0.3.2 / 2009-08-04

* Added Regression::Multiple::GslEngine
* Added setup.rb
* Crosstab#row_label and #column_name
* DominanceAnalysis and DominanceAnalysisBootstrap uses Dataset#labels for Vector names.

=== 0.3.1 / 2009-08-03

* Name and logic of Regression classes changed. Now, you have Regression::Simple class and Regression::Multiple module with two engines: RubyEngine and AlglibEngne
* New Crosstab#summary

=== 0.3.0 / 2009-08-02

* Statsample renamed to Statsample
* Optimization extension goes to another gem: ruby-statsample-optimization

=== 0.2.0 / 2009-08-01

* One Way Anova on Statsample::Anova::OneWay
* Dominance Analysis!!!! The one and only reason to develop a Multiple Regression on pure ruby.
* Multiple Regression on Multiple Regression module. Pairwise (pure ruby) or MultipleRegressionPairwise and Listwise (optimized) on MultipleRegressionAlglib and 
* New Dataset#to_gsl_matrix, #from_to,#[..],#bootstrap,#vector_missing_values, #vector_count_characters, #each_with_index, #collect_with_index
* New Vector#box_cox_transformation
* Module Correlation renamed to Bivariate
* Some fancy methods and classes to create Summaries
* Some documentation about Algorithm used on doc_latex
* Deleted 'distributions' extension. Ruby/GSL has all the pdf and cdf you ever need.
* Tests work without any dependency. Only nags about missing deps. 
* Test for MultipleRegression, Anova, Excel, Bivariate.correlation_matrix and many others 

=== 0.1.9 / 2009-05-22

* Class Vector: new method vector_standarized_pop, []=, min,max
* Class Dataset: global variable $RUBY_SS_ROW stores the row number on each() and related methods. dup() with argument returns a copy of the dataset only for given fields. New methods: standarize, vector_mean, collect, verify,collect_matrix
* Module Correlation: new methods covariance, t_pearson, t_r, prop_pearson, covariance_matrix, correlation_matrix, correlation_probability_matrix
* Module SRS: New methods estimation_n0 and estimation_n
* Module Reliability: new ItemCharacteristicCurve class
* New HtmlReport class
* New experimental SPSS Class. 
* Converters: Module CSV with new options. Added write() method for GGobi module 
* New Mx exporter (http://www.vcu.edu/mx/)
* Class SimpleRegression: new methods standard error

* Added tests for regression and reliability, Vector#vector_mean, Dataset#dup (partial)  and Dataset#verify


=== 0.1.8 / 2008-12-10
* Added Regression and Reliability modules
* Class Vector: added methods vector_standarized, recode, inspect, ranked
* Class Dataset: added methods vector_by_calculation, vector_sum, filter_field
* Module Correlation: added methods like spearman, point biserial and tau-b
* Added tests for Vector#ranked, Vector#vector_standarized,  Vector#sum_of_squared_deviation, Dataset#vector_by_calculation, Dataset#vector_sum, Dataset#filter_field and various test for Correlation module
* Added demos: item_analysis and sample_test

=== 0.1.7 / 2008-10-1
* New module for codification
* ...
=== 0.1.6 / 2008-09-26
* New modules for SRS and stratified sampling
* Statsample::Database for read and write onto databases.
  You could use Database and CSV on-tandem for mass-editing and reimport
  of databases

=== 0.1.5 / 2008-08-29
* New extension statsampleopt for optimizing some functions on Statsample submodules
* New submodules Correlation and Test

=== 0.1.4 / 2008-08-27

* New extension, with cdf functions for
  chi-square, t, gamma and normal distributions. 
  Based on dcdflib (http://www.netlib.org/random/)
  Also, has a function to calculate the tail for a noncentral T distribution

=== 0.1.3 / 2008-08-22

* Operational versions of Vector, Dataset, Crosstab and Resample
* Read and write CSV files
* Calculate chi-square for 2 matrixes

=== 0.1.1 - 0.1.2 / 2008-08-18

* Included several methods on Ruby::Type classes
* Organized dirs with sow


=== 0.1.0 / 2008-08-12

* First version.


================================================
FILE: LICENSE.txt
================================================
Copyright (c) 2009-2014, Claudio Bustos
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: Manifest.txt
================================================
.travis.yml
Gemfile
Gemfile.lock
History.txt
LICENSE.txt
Manifest.txt
README.md
Rakefile
benchmarks/correlation_matrix_15_variables.rb
benchmarks/correlation_matrix_5_variables.rb
benchmarks/correlation_matrix_methods/correlation_matrix.ds
benchmarks/correlation_matrix_methods/correlation_matrix.html
benchmarks/correlation_matrix_methods/correlation_matrix.rb
benchmarks/correlation_matrix_methods/correlation_matrix.xls
benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods
benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods
benchmarks/correlation_matrix_methods/results.ds
benchmarks/factor_map.rb
benchmarks/helpers_benchmark.rb
data/locale/es/LC_MESSAGES/statsample.mo
doc_latex/manual/equations.tex
examples/boxplot.rb
examples/correlation_matrix.rb
examples/dataset.rb
examples/dominance_analysis.rb
examples/dominance_analysis_bootstrap.rb
examples/histogram.rb
examples/icc.rb
examples/levene.rb
examples/multiple_regression.rb
examples/multivariate_correlation.rb
examples/parallel_analysis.rb
examples/polychoric.rb
examples/principal_axis.rb
examples/reliability.rb
examples/scatterplot.rb
examples/t_test.rb
examples/tetrachoric.rb
examples/u_test.rb
examples/vector.rb
examples/velicer_map_test.rb
grab_references.rb
lib/spss.rb
lib/statsample.rb
lib/statsample/analysis.rb
lib/statsample/analysis/suite.rb
lib/statsample/analysis/suitereportbuilder.rb
lib/statsample/anova.rb
lib/statsample/anova/contrast.rb
lib/statsample/anova/oneway.rb
lib/statsample/anova/twoway.rb
lib/statsample/bivariate.rb
lib/statsample/bivariate/pearson.rb
lib/statsample/codification.rb
lib/statsample/converter/csv.rb
lib/statsample/converter/spss.rb
lib/statsample/converters.rb
lib/statsample/crosstab.rb
lib/statsample/dataset.rb
lib/statsample/dominanceanalysis.rb
lib/statsample/dominanceanalysis/bootstrap.rb
lib/statsample/factor.rb
lib/statsample/factor/map.rb
lib/statsample/factor/parallelanalysis.rb
lib/statsample/factor/pca.rb
lib/statsample/factor/principalaxis.rb
lib/statsample/factor/rotation.rb
lib/statsample/graph.rb
lib/statsample/graph/boxplot.rb
lib/statsample/graph/histogram.rb
lib/statsample/graph/scatterplot.rb
lib/statsample/histogram.rb
lib/statsample/matrix.rb
lib/statsample/multiset.rb
lib/statsample/regression.rb
lib/statsample/regression/multiple.rb
lib/statsample/regression/multiple/alglibengine.rb
lib/statsample/regression/multiple/baseengine.rb
lib/statsample/regression/multiple/gslengine.rb
lib/statsample/regression/multiple/matrixengine.rb
lib/statsample/regression/multiple/rubyengine.rb
lib/statsample/regression/simple.rb
lib/statsample/reliability.rb
lib/statsample/reliability/icc.rb
lib/statsample/reliability/multiscaleanalysis.rb
lib/statsample/reliability/scaleanalysis.rb
lib/statsample/reliability/skillscaleanalysis.rb
lib/statsample/resample.rb
lib/statsample/rserve_extension.rb
lib/statsample/shorthand.rb
lib/statsample/srs.rb
lib/statsample/test.rb
lib/statsample/test/bartlettsphericity.rb
lib/statsample/test/chisquare.rb
lib/statsample/test/f.rb
lib/statsample/test/kolmogorovsmirnov.rb
lib/statsample/test/levene.rb
lib/statsample/test/t.rb
lib/statsample/test/umannwhitney.rb
lib/statsample/test/wilcoxonsignedrank.rb
lib/statsample/vector.rb
lib/statsample/vector/gsl.rb
lib/statsample/version.rb
po/es/statsample.mo
po/es/statsample.po
po/statsample.pot
references.txt
setup.rb
test/fixtures/bank2.dat
test/fixtures/correlation_matrix.rb
test/fixtures/hartman_23.matrix
test/fixtures/repeated_fields.csv
test/fixtures/stock_data.csv
test/fixtures/test_csv.csv
test/fixtures/test_xls.xls
test/fixtures/tetmat_matrix.txt
test/fixtures/tetmat_test.txt
test/helpers_tests.rb
test/test_analysis.rb
test/test_anova_contrast.rb
test/test_anovaoneway.rb
test/test_anovatwoway.rb
test/test_anovatwowaywithdataset.rb
test/test_anovawithvectors.rb
test/test_bartlettsphericity.rb
test/test_bivariate.rb
test/test_codification.rb
test/test_crosstab.rb
test/test_csv.rb
test/test_dataset.rb
test/test_dominance_analysis.rb
test/test_factor.rb
test/test_factor_map.rb
test/test_factor_pa.rb
test/test_ggobi.rb
test/test_gsl.rb
test/test_histogram.rb
test/test_matrix.rb
test/test_multiset.rb
test/test_regression.rb
test/test_reliability.rb
test/test_reliability_icc.rb
test/test_reliability_skillscale.rb
test/test_resample.rb
test/test_rserve_extension.rb
test/test_srs.rb
test/test_statistics.rb
test/test_stest.rb
test/test_stratified.rb
test/test_test_f.rb
test/test_test_kolmogorovsmirnov.rb
test/test_test_t.rb
test/test_umannwhitney.rb
test/test_vector.rb
test/test_wilcoxonsignedrank.rb
test/test_xls.rb
web/Rakefile


================================================
FILE: README.md
================================================
# Statsample

Homepage :: https://github.com/sciruby/statsample

[![Build Status](https://travis-ci.org/clbustos/statsample.svg?branch=master)](https://travis-ci.org/clbustos/statsample)
[![Gem Version](https://badge.fury.io/rb/statsample.svg)](http://badge.fury.io/rb/statsample)
## DESCRIPTION

A suite for basic and advanced statistics on Ruby. Tested on Ruby 2.1.1p76 (June 2014), 1.8.7, 1.9.1, 1.9.2 (April, 2010), ruby-head(June, 2011) and JRuby 1.4 (Ruby 1.8.7 compatible).

Include:
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
* Imports and exports datasets from and to Excel, CSV and plain text files.
* Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial, tau a, tau b and  gamma.  Tetrachoric and Polychoric correlation provides by +statsample-bivariate-extension+ gem.
* Intra-class correlation
* Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for One-way ANOVA.
* Tests: F, T, Levene, U-Mannwhitney.
* Regression: Simple, Multiple (OLS), Probit  and Logit
* Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis and Velicer's MAP test, for estimation of number of factors.
* Reliability analysis for simple scale and a DSL to easily analyze multiple scales using factor analysis and correlations, if you want it.
* Basic time series support
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
* Sample calculation related formulas
* Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
* Creates reports on text, html and rtf, using ReportBuilder gem
* Graphics: Histogram, Boxplot and Scatterplot

## Principles

* Software Design: 
  * One module/class for each type of analysis
  * Options can be set as hash on initialize() or as setters methods
  * Clean API for interactive sessions
  * summary() returns all necessary informacion for interactive sessions
  * All statistical data available though methods on objects
  * All (important) methods should be tested. Better with random data.
* Statistical Design
  * Results are tested against text results, SPSS and R outputs.
  * Go beyond Null Hiphotesis Testing, using confidence intervals and effect sizes when possible
  * (When possible) All references for methods are documented, providing sensible information on documentation 

## Features

* Classes for manipulation and storage of data:
  * Statsample::Vector: An extension of an array, with statistical methods like sum, mean and standard deviation
  * Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample. 
  * Statsample::Multiset: multiple datasets with same fields and type of vectors
* Anova module provides generic Statsample::Anova::OneWay and vector based Statsample::Anova::OneWayWithVectors. Also you can create contrast using Statsample::Anova::Contrast
* Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
* Multiple types of regression.
  * Simple Regression :  Statsample::Regression::Simple
  * Multiple Regression: Statsample::Regression::Multiple
  * Logit Regression:    Statsample::Regression::Binomial::Logit
  * Probit Regression:    Statsample::Regression::Binomial::Probit
* Factorial Analysis algorithms on Statsample::Factor module.
  * Classes for Extraction of factors: 
    * Statsample::Factor::PCA
    * Statsample::Factor::PrincipalAxis
  * Classes for Rotation of factors: 
    * Statsample::Factor::Varimax
    * Statsample::Factor::Equimax
    * Statsample::Factor::Quartimax
  * Classes for calculation of factors to retain
    * Statsample::Factor::ParallelAnalysis performs Horn's 'parallel analysis' to a principal components analysis to adjust for sample bias in the retention of components.
    * Statsample::Factor::MAP performs Velicer's Minimum Average Partial (MAP) test, which retain components as long as the variance in the correlation matrix represents systematic variance.
* Dominance Analysis. Based on Budescu and Azen papers, dominance analysis is a method to analyze the relative importance of one predictor relative to another on multiple regression
  * Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables
  * Statsample::DominanceAnalysis::Bootstrap can execute bootstrap analysis to determine dominance stability, as recomended by  Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/]. 
* Module Statsample::Codification, to help to codify open questions
* Converters to import and export data:
  * Statsample::Database : Can create sql to create tables, read and insert data
  * Statsample::CSV : Read and write CSV files
  * Statsample::Excel : Read and write Excel files
  * Statsample::Mx    : Write Mx Files
  * Statsample::GGobi : Write Ggobi files
* Module Statsample::Crosstab provides function to create crosstab for categorical data
* Module Statsample::Reliability provides functions to analyze scales with psychometric methods. 
  * Class Statsample::Reliability::ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
  * Class Statsample::Reliability::MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
  * Class Statsample::Reliability::ICC provides intra-class correlation, using Shrout & Fleiss(1979) and McGraw & Wong (1996) formulations.
* Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
* Module Statsample::Test provides several methods and classes to perform inferencial statistics
  * Statsample::Test::BartlettSphericity
  * Statsample::Test::ChiSquare
  * Statsample::Test::F
  * Statsample::Test::KolmogorovSmirnov (only D value)
  * Statsample::Test::Levene
  * Statsample::Test::UMannWhitney
  * Statsample::Test::T
  * Statsample::Test::WilcoxonSignedRank
* Module Graph provides several classes to create beautiful graphs using rubyvis
  * Statsample::Graph::Boxplot
  * Statsample::Graph::Histogram
  * Statsample::Graph::Scatterplot
* Gem <tt>bio-statsample-timeseries</tt> provides module Statsample::TimeSeries with support for time series, including ARIMA estimation using Kalman-Filter. 
* Gem <tt>statsample-sem</tt> provides a DSL to R libraries +sem+ and +OpenMx+
* Gem <tt>statsample-glm</tt> provides you with GML method, to work with Logistic, Poisson and Gaussian regression ,using ML or IRWLS. 
* Close integration with gem <tt>reportbuilder</tt>, to easily create reports on text, html and rtf formats.

# Examples of use:

See the [examples folder](https://github.com/clbustos/statsample/tree/master/examples/) too.

## Boxplot

```ruby
require 'statsample'

ss_analysis(Statsample::Graph::Boxplot) do 
  n=30
  a=rnorm(n-1,50,10)
  b=rnorm(n, 30,5)
  c=rnorm(n,5,1)
  a.push(2)
  boxplot(:vectors=>[a,b,c], :width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
end
Statsample::Analysis.run # Open svg file on *nix application defined
```

## Correlation matrix

```ruby
require 'statsample'
# Note R like generation of random gaussian variable
# and correlation matrix

ss_analysis("Statsample::Bivariate.correlation_matrix") do
  samples=1000
  ds=data_frame(
    'a'=>rnorm(samples), 
    'b'=>rnorm(samples),
    'c'=>rnorm(samples),
    'd'=>rnorm(samples))
  cm=cor(ds) 
  summary(cm)
end

Statsample::Analysis.run_batch # Echo output to console
```

# Requirements

Optional: 

* Plotting: gnuplot and rbgnuplot, SVG::Graph
* Factorial analysis and polychorical correlation(joint estimate and polychoric series): gsl library and rb-gsl (https://rubygems.org/gems/rb-gsl/). You should install it using <tt>gem install rb-gsl</tt>. 

*Note*: Use gsl 1.12.109 or later.

# Resources

* Source code on github :: http://github.com/clbustos/statsample
* Docs :: http://statsample.apsique.cl/
* Bug report and feature request :: http://github.com/clbustos/statsample/issues
* E-mailing list :: http://groups.google.com/group/statsample

# Installation

```bash
$ sudo gem install statsample
```

On *nix, you should install statsample-optimization to retrieve gems gsl, statistics2 and a C extension to speed some methods. 

There are available precompiled version for Ruby 1.9 on x86, x86_64 and mingw32 archs.

```bash
$ sudo gem install statsample-optimization
```

If you use Ruby 1.8, you should compile statsample-optimization, usign parameter <tt>--platform ruby</tt>

```bash
$ sudo gem install statsample-optimization --platform ruby
```

If you need to work on Structural Equation Modeling, you could see +statsample-sem+. You need R with +sem+ or +OpenMx+ [http://openmx.psyc.virginia.edu/] libraries installed

```bash
$ sudo gem install statsample-sem
```

Available setup.rb file

```bash
sudo gem ruby setup.rb
```

## License

BSD-3 (See LICENSE.txt)

Could change between version, without previous warning. If you want a specific license, just choose the version that you need.


================================================
FILE: Rakefile
================================================
#!/usr/bin/ruby
# -*- ruby -*-
# -*- coding: utf-8 -*-
$:.unshift(File.dirname(__FILE__)+'/lib/')

require 'rubygems'
require 'statsample'
require 'hoe'
require 'rdoc'

Hoe.plugin :git
Hoe.plugin :doofus
desc "Ruby Lint"
task :lint do
  executable=Config::CONFIG['RUBY_INSTALL_NAME']
  Dir.glob("lib/**/*.rb") {|f|
    if !system %{#{executable} -w -c "#{f}"} 
        puts "Error on: #{f}"
    end
  }
end

task :release do
system %{git push origin master}
end

task "clobber_docs" do
  # Only to omit warnings
end
desc "Update pot/po files."
task "gettext:updatepo" do
  require 'gettext/tools'
  GetText.update_pofiles("statsample", Dir.glob("{lib,bin}/**/*.{rb,rhtml}"), "statsample #{Statsample::VERSION}")
end

desc "Create mo-files"
task "gettext:makemo" do
  require 'gettext/tools'
  GetText.create_mofiles()
  # GetText.create_mofiles(true, "po", "locale")  # This is for "Ruby on Rails".
end

h=Hoe.spec('statsample') do 
  self.version=Statsample::VERSION
  self.urls=["https://github.com/clbustos/statsample"]
  #self.testlib=:minitest
  self.readme_file = 'README.md'
  self.urls = ['https://github.com/clbustos/statsample']
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
  self.extra_deps << ["spreadsheet","~>0.6"] <<  ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client"] << ["rubyvis"] << ["distribution"]
  
  self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>3"] << ["minitest", "~>2"]  << ["gettext", "~>0"] << ["mocha", "~>0"] << ["hoe-git", "~>0"]
  
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
  self.post_install_message = <<-EOF
***************************************************
Thanks for installing statsample.

On *nix, you could install statsample-optimization
to retrieve gems gsl, statistics2 and a C extension
to speed some methods.

  $ sudo gem install statsample-optimization

On Ubuntu, install  build-essential and libgsl0-dev 
using apt-get. Compile ruby 1.8 or 1.9 from 
source code first.

  $ sudo apt-get install build-essential libgsl0-dev


*****************************************************
  EOF
  self.need_rdoc=false
end

if Rake.const_defined?(:RDocTask)
Rake::RDocTask.new(:docs) do |rd|
  rd.main = h.readme_file
  rd.options << '-d' if (`which dot` =~ /\/dot/) unless
    ENV['NODOT'] || Hoe::WINDOZE
  rd.rdoc_dir = 'doc'
  
  rd.rdoc_files.include("lib/**/*.rb")
  rd.rdoc_files += h.spec.extra_rdoc_files
  rd.rdoc_files.reject! {|f| f=="Manifest.txt"}
  title = h.spec.rdoc_options.grep(/^(-t|--title)=?$/).first
  if title then
    rd.options << title
  
    unless title =~ /\=/ then # for ['-t', 'title here']
    title_index = spec.rdoc_options.index(title)
    rd.options << spec.rdoc_options[title_index + 1]
    end
  else
    title = "#{h.name}-#{h.version} Documentation"
    title = "#{h.rubyforge_name}'s " + title if h.rubyforge_name != h.name
    rd.options << '--title' << title
  end
end

end

desc 'Publish rdocs with analytics support'
task :publicar_docs => [:clean] do
#  ruby %{agregar_adsense_a_doc.rb}
  path = File.expand_path("./doc.yaml")
  config = YAML.load(File.read(path))
  host = "#{config["user"]}@#{config["host"]}"
  
  remote_dir = config["dir"]
  local_dir = h.local_rdoc_dir
  Dir.glob(local_dir+"/**/*") {|file|
    sh %{chmod 755 #{file}}
  }
  sh %{rsync #{h.rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
end

# vim: syntax=Ruby


================================================
FILE: benchmarks/correlation_matrix_15_variables.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))

extend BenchPress
cases=250
vars=20


name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
author 'Clbustos'
date '2011-01-18'
summary "
A correlation matrix could be constructed using matrix algebra or
mannualy, calculating covariances, means and sd for each pair of vectors.
In this test, we test the calculation using #{vars} variables with 
#{cases} cases on each vector
"

reps 200 #number of repetitions

ds=vars.times.inject({}) {|ac,v|
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
ac
}.to_dataset
    
measure "Statsample::Bivariate.correlation_matrix_optimized" do
  Statsample::Bivariate.correlation_matrix_optimized(ds)
end

measure "Statsample::Bivariate.correlation_matrix_pairwise" do
  Statsample::Bivariate.correlation_matrix_pairwise(ds)
end


================================================
FILE: benchmarks/correlation_matrix_5_variables.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))

extend BenchPress
cases=500
vars=5


name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
author 'Clbustos'
date '2011-01-18'
summary "
A correlation matrix could be constructed using matrix algebra or
mannualy, calculating covariances, means and sd for each pair of vectors.
In this test, we test the calculation using #{vars} variables with 
#{cases} cases on each vector
"

reps 200 #number of repetitions


ds=vars.times.inject({}) {|ac,v|
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
ac
}.to_dataset
    
measure "Statsample::Bivariate.correlation_matrix_optimized" do
  Statsample::Bivariate.correlation_matrix_optimized(ds)
end

measure "Statsample::Bivariate.correlation_matrix_pairwise" do
  Statsample::Bivariate.correlation_matrix_pairwise(ds)
end


================================================
FILE: benchmarks/correlation_matrix_methods/correlation_matrix.html
================================================
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
<title>Correlation matrix analysis</title>
        <style>
        body {
          margin:0;
          padding:1em;
        }
        table {
          border-collapse: collapse;

        }
        table td {
          border: 1px solid black;
        }
        .section {
          margin:0.5em;
        }
        </style>

</head><body>
<h1>Correlation matrix analysis</h1><div id='toc'><div class='title'>List of contents</div>
<ul>
<li><a href='#toc_1'>Multiple reggresion of cases,vars,c_v on time_optimized</a></li>
<ul>
<li><a href='#toc_2'>ANOVA</a></li>
</ul>
<li><a href='#toc_3'>Multiple reggresion of cases,vars,c_v on time_pairwise</a></li>
<ul>
<li><a href='#toc_4'>ANOVA</a></li>
</ul>
</ul>
</div>
<div class='tot'><div class='title'>List of tables</div><ul><li><a href='#table_1'>ANOVA Table</a></li><li><a href='#table_2'>Beta coefficients</a></li><li><a href='#table_3'>ANOVA Table</a></li><li><a href='#table_4'>Beta coefficients</a></li></ul></div>
  <div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_optimized</h2><a name='toc_1'></a>
    <p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
    <p>Cases(listwise)=63(63)</p>
    <p>R=0.978844</p>
    <p>R^2=0.958137</p>
    <p>R^2 Adj=0.956008</p>
    <p>Std.Error R=3.092024</p>
    <p>Equation=4.031667 + 0.018039cases + 0.244790vars + 0.001197c_v</p>
    <div class='section'><h3>ANOVA</h3><a name='toc_2'></a>
      <a name='table_1'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
<tbody>
<tr><td>Regression</td><td>12910.098</td><td>3</td><td>4303.366</td><td>450.114</td><td>0.000</td></tr>
<tr><td>Error</td><td>564.076</td><td>59</td><td>9.561</td><td></td><td></td></tr>
<tr><td>Total</td><td>13474.174</td><td>62</td><td>4312.927</td><td></td><td></td></tr>
</tbody>
</table>

    </div>
    <a name='table_2'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
<tbody>
<tr><td>Constant</td><td>4.031667</td><td>-</td><td>0.752604</td><td>5.356953</td></tr>
<tr><td>cases</td><td>0.018039</td><td>0.381587</td><td>0.001961</td><td>9.200093</td></tr>
<tr><td>vars</td><td>0.244790</td><td>0.224390</td><td>0.036055</td><td>6.789335</td></tr>
<tr><td>c_v</td><td>0.001197</td><td>0.584174</td><td>0.000094</td><td>12.738410</td></tr>
</tbody>
</table>

  </div>
  <div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_pairwise</h2><a name='toc_3'></a>
    <p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
    <p>Cases(listwise)=63(63)</p>
    <p>R=0.999637</p>
    <p>R^2=0.999275</p>
    <p>R^2 Adj=0.999238</p>
    <p>Std.Error R=0.538365</p>
    <p>Equation=-0.520303 + -0.000708cases + 1.234451vars + 0.000735c_v</p>
    <div class='section'><h3>ANOVA</h3><a name='toc_4'></a>
      <a name='table_3'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
<tbody>
<tr><td>Regression</td><td>23554.271</td><td>3</td><td>7851.424</td><td>27089.134</td><td>0.000</td></tr>
<tr><td>Error</td><td>17.100</td><td>59</td><td>0.290</td><td></td><td></td></tr>
<tr><td>Total</td><td>23571.372</td><td>62</td><td>7851.714</td><td></td><td></td></tr>
</tbody>
</table>

    </div>
    <a name='table_4'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
<tbody>
<tr><td>Constant</td><td>-0.520303</td><td>-</td><td>0.131039</td><td>-3.970594</td></tr>
<tr><td>cases</td><td>-0.000708</td><td>-0.011324</td><td>0.000341</td><td>-2.074007</td></tr>
<tr><td>vars</td><td>1.234451</td><td>0.855546</td><td>0.006278</td><td>196.641087</td></tr>
<tr><td>c_v</td><td>0.000735</td><td>0.271138</td><td>0.000016</td><td>44.912972</td></tr>
</tbody>
</table>

  </div>
</body></html>

================================================
FILE: benchmarks/correlation_matrix_methods/correlation_matrix.rb
================================================
# This test create a database to adjust the best algorithm
# to use on correlation matrix
require(File.expand_path(File.dirname(__FILE__)+'/../helpers_benchmark.rb'))
require 'statsample'
require 'benchmark'

def create_dataset(vars,cases) 
  ran=Distribution::Normal.rng
  ds=vars.times.inject({}) {|ac,v|
    ac["x#{v}"]=Statsample::Vector.new_scale(cases) {ran.call}
  ac
  }.to_dataset
end

def prediction_pairwise(vars,cases)
	Statsample::Bivariate.prediction_pairwise(vars,cases) / 10
end
def prediction_optimized(vars,cases)
	Statsample::Bivariate.prediction_optimized(vars,cases) / 10
end


if !File.exists?("correlation_matrix.ds") or File.mtime(__FILE__) > File.mtime("correlation_matrix.ds")
reps=100 #number of repetitions
ds_sizes=[5,10,30,50,100,150,200,500,1000]
ds_vars=[3,4,5,10,20,30,40]
#ds_sizes=[5,10]
#ds_vars=[3,5,20]
rs=Statsample::Dataset.new(%w{cases vars time_optimized time_pairwise})

ds_sizes.each do |cases|
  ds_vars.each do |vars|
      ds=create_dataset(vars,cases)
      time_optimized= Benchmark.realtime do
        reps.times { 
        Statsample::Bivariate.correlation_matrix_optimized(ds) 
        ds.clear_gsl
        }
      end
      
      time_pairwise= Benchmark.realtime do
        reps.times { 
        Statsample::Bivariate.correlation_matrix_pairwise(ds)
        }
      end
      
      puts "Cases:#{cases}, vars:#{vars} -> opt:%0.3f (%0.3f) | pair: %0.3f (%0.3f)" % [time_optimized, prediction_optimized(vars,cases), time_pairwise, prediction_pairwise(vars,cases)]
      
      rs.add_case({'cases'=>cases,'vars'=>vars,'time_optimized'=>Math.sqrt(time_optimized*1000),'time_pairwise'=>Math.sqrt(time_pairwise*1000)})
    end
  end
  
else
  rs=Statsample.load("correlation_matrix.ds")
end


rs.fields.each {|f| rs[f].type=:scale}

rs['c_v']=rs.collect {|row| row['cases']*row['vars']}

rs.update_valid_data
rs.save("correlation_matrix.ds")
Statsample::Excel.write(rs,"correlation_matrix.xls")


rb=ReportBuilder.new(:name=>"Correlation matrix analysis")

rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_optimized','c_v']],'time_optimized', :digits=>6))
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_pairwise','c_v']],'time_pairwise', :digits=>6))


rb.save_html("correlation_matrix.html")


================================================
FILE: benchmarks/factor_map.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))

extend BenchPress


name "Statsample::Factor::Map with and without GSL"
author 'Clbustos'
date '2011-01-18'
summary "Velicer's MAP uses a lot of Matrix algebra. How much we can improve the timing using GSL?
"

reps 20 #number of repetitions

m=Matrix[ 
        [ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
        [ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
        [ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
        [ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
        [ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
        [ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
        [ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
        [ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
  ]
  
map=Statsample::Factor::MAP.new(m)


measure "Statsample::Factor::MAP without GSL" do
  map.use_gsl=false
  map.compute
end

measure "Statsample::Factor::MAP with GSL" do
  map.use_gsl=true
  map.compute
end


================================================
FILE: benchmarks/helpers_benchmark.rb
================================================
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))

require 'statsample'
require 'bench_press'

================================================
FILE: doc_latex/manual/equations.tex
================================================
\part{Equations}
\section{Convention}
\begin{align*}
n &= \text{sample size}\\
N &= \text{population size}\\
p &= \text{proportion inside a sample}\\
P &= \text{proportion inside a population}
\end{align*}
\section{Ruby::Regression::Multiple}

To compute the standard error of coefficients, you obtain the estimated variance-covariance matrix of error.

Let \mathbf{X} be matrix of predictors data, including a constant column; \mathbf{MSE} as mean square error; SSE as Sum of squares of errors; n the number of cases; p as number of predictors

\begin{equation}
\mathbf{MSE}=\frac{SSE}{n-p-1}
\end{equation}

\begin{equation}
\mathbf{E}=(\mathbf{X'}\mathbf{X})^-1\mathbf{MSE}
\end{equation}

The root squares of diagonal should be standard errors


\section{Ruby::SRS}
Finite Poblation correction is used on standard error calculation on poblation below 10.000. Function 
\begin{verbatim}
fpc_var(sam,pop)
\end{verbatim}
calculate FPC for variance with
\begin{equation}
fpc_{var} = \frac{N-n} {N-1}
\end{equation}

with n  as sam and N as pop

Function 
\begin{verbatim}
fpc = fpc(sam,pop)
\end{verbatim}

calculate FPC for standard deviation with 
\begin{equation}
fpc_{sd} = \sqrt{\frac{N-n} {N-1}}
\label{fpc}
\end{equation}
with n  as sample size and N as population size.

\subsection{Sample Size estimation for proportions}

On infinite poblations, you should use method
\begin{verbatim}
estimation_n0(d,prop,margin=0.95)
\end{verbatim}
which uses
\begin{equation}
n = \frac{t^2(pq)}{d^2}
\label{n_i}
\end{equation}
where
\begin{align*}
t &= \text{t value for given level of confidence ( 1.96 for 95\% )}\\
d &= \text{margin of error}
\end{align*}

On finite poblations, you should use
\begin{verbatim}
estimation_n(d,prop,n_pobl, margin=0.95)
\end{verbatim}
which uses
\begin{equation}
n = \frac{n_i}{1+(\frac{n_i-1}{N})}
\end{equation}

Where $n_i$ is n on \ref{n_i} and N is population size


================================================
FILE: examples/boxplot.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
Statsample::Analysis.store(Statsample::Graph::Boxplot) do 
  n=30
  a=rnorm(n-1,50,10)
  b=rnorm(n, 30,5)
  c=rnorm(n,5,1)
  a.push(2)
  boxplot(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
  
end

if __FILE__==$0
  Statsample::Analysis.run
end


================================================
FILE: examples/correlation_matrix.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'

Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
  samples=1000
  ds=data_frame(
    'a'=>rnorm(samples),
    'b'=>rnorm(samples),
    'c'=>rnorm(samples),
    'd'=>rnorm(samples))
  cm=cor(ds)
  summary(cm)
end

if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/dataset.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'

Statsample::Analysis.store(Statsample::Dataset) do
  samples=1000
  a=Statsample::Vector.new_scale(samples) {r=rand(5); r==4 ? nil: r}
  b=Statsample::Vector.new_scale(samples) {r=rand(5); r==4 ? nil: r}

  ds={'a'=>a,'b'=>b}.to_dataset
  summary(ds)
end

if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/dominance_analysis.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'


Statsample::Analysis.store(Statsample::DominanceAnalysis) do
  sample=300
  a=rnorm(sample)
  b=rnorm(sample)
  c=rnorm(sample)
  d=rnorm(sample)
  
  ds={'a'=>a,'b'=>b,'cc'=>c,'d'=>d}.to_dataset
  attach(ds)
  ds['y']=a*5+b*3+cc*2+d+rnorm(300)  
  cm=cor(ds)
  summary(cm)
  lr=lr(ds,'y')
  summary(lr)
  da=dominance_analysis(ds,'y')
  summary(da)
  
  da=dominance_analysis(ds,'y',:name=>"Dominance Analysis using group of predictors", :predictors=>['a', 'b', %w{cc d}])
  summary(da)
end


if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/dominance_analysis_bootstrap.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'

Statsample::Analysis.store(Statsample::DominanceAnalysis::Bootstrap) do
  
  sample=300
  a=rnorm(sample)
  b=rnorm(sample)
  c=rnorm(sample)
  d=rnorm(sample)  
  a.name="a"
  b.name="b"
  c.name="c"
  d.name="d"
  
  ds={'a'=>a,'b'=>b,'cc'=>c,'d'=>d}.to_dataset
  attach(ds)
  ds['y1']=a*5+b*2+cc*2+d*2+rnorm(sample,0,10)
  ds['y2']=a*10+rnorm(sample)
  
  dab=dominance_analysis_bootstrap(ds, ['y1','y2'], :debug=>true)
  dab.bootstrap(100,nil)
  summary(dab)
  ds2=ds['a'..'y1']
  dab2=dominance_analysis_bootstrap(ds2, 'y1', :debug=>true)
  dab2.bootstrap(100,nil)
  summary(dab2)
end

if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/histogram.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'

Statsample::Analysis.store(Statsample::Graph::Histogram) do
  histogram(rnorm(3000,0,20))
end


if __FILE__==$0
   Statsample::Analysis.run
end


================================================
FILE: examples/icc.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'

Statsample::Analysis.store(Statsample::Reliability::ICC) do

  size=1000
  a=Statsample::Vector.new_scale(size) {rand(10)}
  b=a.recode{|i|i+rand(4)-2}
  c=a.recode{|i|i+rand(4)-2}
  d=a.recode{|i|i+rand(4)-2}
  @ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
  @icc=Statsample::Reliability::ICC.new(@ds)
  summary(@icc)
  @icc.type=:icc_3_1
  summary(@icc)
  @icc.type=:icc_a_k
  summary(@icc)
  
end

if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/levene.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'

Statsample::Analysis.store(Statsample::Test::Levene) do

  a=[1,2,3,4,5,6,7,8,100,10].to_scale
  b=[30,40,50,60,70,80,90,100,110,120].to_scale
  summary(levene([a,b]))
end

if __FILE__==$0
   Statsample::Analysis.run_batch
end


================================================
FILE: examples/multiple_regression.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'

Statsample::Analysis.store(Statsample::Regression::Multiple) do

  samples=2000
  ds=dataset('a'=>rnorm(samples),'b'=>rnorm(samples),'cc'=>rnorm(samples),'d'=>rnorm(samples))
  attach(ds)
  ds['y']=a*5+b*3+cc*2+d+rnorm(samples)
  summary lr(ds,'y')
end

if __FILE__==$0
   Statsample::Analysis.run_batch
end


================================================
FILE: examples/multivariate_correlation.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'
require 'mathn'


Statsample::Analysis.store(Statsample::Regression::Multiple::MultipleDependent) do
  
  complete=Matrix[
  [1,0.53,0.62,0.19,-0.09,0.08,0.02,-0.12,0.08],
  [0.53,1,0.61,0.23,0.1,0.18,0.02,-0.1,0.15],
  [0.62,0.61,1,0.03,0.1,0.12,0.03,-0.06,0.12],
  [0.19,0.23,0.03,1,-0.02,0.02,0,-0.02,-0.02],
  [-0.09,0.1,0.1,-0.02,1,0.05,0.06,0.18,0.02],
  [0.08,0.18,0.12,0.02,0.05,1,0.22,-0.07,0.36],
  [0.02,0.02,0.03,0,0.06,0.22,1,-0.01,-0.05],
  [-0.12,-0.1,-0.06,-0.02,0.18,-0.07,-0.01,1,-0.03],
  [0.08,0.15,0.12,-0.02,0.02,0.36,-0.05,-0.03,1]]
  
  complete.extend Statsample::CovariateMatrix
  complete.fields=%w{adhd cd odd sex age monly mwork mage poverty}
  
  lr=Statsample::Regression::Multiple::MultipleDependent.new(complete, %w{adhd cd odd})
  
  echo "R^2_yx #{lr.r2yx}"
  echo "P^2_yx #{lr.p2yx}"
end


if __FILE__==$0
   Statsample::Analysis.run_batch
end


================================================
FILE: examples/parallel_analysis.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'
samples=150
variables=30
iterations=50
Statsample::Analysis.store(Statsample::Factor::ParallelAnalysis) do 
  
rng = Distribution::Normal.rng()
f1=rnorm(samples)
f2=rnorm(samples)
f3=rnorm(samples)

vectors={}

variables.times do |i|
  vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.call}.to_scale
  vectors["v#{i}"].name="Vector #{i}"
end

  ds=vectors.to_dataset

  pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>iterations, :debug=>true)
  pca=pca(cor(ds))
  echo "There are 3 real factors on data"
  summary pca
  echo "Traditional Kaiser criterion (k>1) returns #{pca.m} factors"
  summary pa
  echo "Parallel Analysis returns #{pa.number_of_factors} factors to preserve"
end

if __FILE__==$0
   Statsample::Analysis.run_batch
end


================================================
FILE: examples/polychoric.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
$:.unshift("/home/cdx/usr/lib/statsample-bivariate-extension/lib/")

require 'statsample'
Statsample::Analysis.store(Statsample::Bivariate::Polychoric) do 
ct=Matrix[[rand(10)+50, rand(10)+50,  rand(10)+1],
          [rand(20)+5,  rand(50)+4,   rand(10)+1],
          [rand(8)+1,   rand(12)+1,   rand(10)+1]]

# Estimation of polychoric correlation using two-step (default)
poly=polychoric(ct, :name=>"Polychoric with two-step", :debug=>false)
summary poly

# Estimation of polychoric correlation using joint method (slow)
poly=polychoric(ct, :method=>:joint, :name=>"Polychoric with joint")
summary poly

# Uses polychoric series (not recomended)

poly=polychoric(ct, :method=>:polychoric_series, :name=>"Polychoric with polychoric series")
summary poly
end
if __FILE__==$0
   Statsample::Analysis.run_batch
end


================================================
FILE: examples/principal_axis.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'

Statsample::Analysis.store(Statsample::Factor::PrincipalAxis) do

  matrix=Matrix[
  [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807],  [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
  
  matrix.extend Statsample::CovariateMatrix
  
  #matrix.fields=%w{a b c d}
  fa=principal_axis(matrix,:m=>1,:smc=>false)
  
  summary fa
end

if __FILE__==$0
   Statsample::Analysis.run_batch
end


================================================
FILE: examples/reliability.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib')
require 'statsample'

Statsample::Analysis.store(Statsample::Reliability) do
  
  samples=100
  a=rnorm(samples)
  
  ds=Statsample::Dataset.new
  
  20.times do |i|
    ds["v#{i}"]=a+rnorm(samples,0,0.2)
  end
  
  ds.update_valid_data
  
  rel=Statsample::Reliability::ScaleAnalysis.new(ds)
  summary rel
  
  
  ms=Statsample::Reliability::MultiScaleAnalysis.new(:name=>"Multi Scale analyss") do |m|
    m.scale "Scale 1", ds.clone(%w{v1 v2 v3 v4 v5 v6 v7 v8 v9 v10})
    m.scale "Scale 2", ds.clone(%w{v11 v12 v13 v14 v15 v16 v17 v18 v19})
  end
  
  summary ms
end

if __FILE__==$0
   Statsample::Analysis.run_batch
end


================================================
FILE: examples/scatterplot.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')
$:.unshift('/home/cdx/dev/reportbuilder/lib/')

require 'benchmark'
require 'statsample'
n=100

Statsample::Analysis.store(Statsample::Graph::Scatterplot) do
  x=rnorm(n)
  y=x+rnorm(n,0.5,0.2)
  scatterplot(x,y)
end

if __FILE__==$0
  Statsample::Analysis.run
end


================================================
FILE: examples/t_test.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib')
require 'statsample'

Statsample::Analysis.store(Statsample::Test::T) do
  
  
  a=rnorm(10)
  t_1=Statsample::Test.t_one_sample(a,{:u=>50})
  summary t_1
  
  b=rnorm(10,2)
  
  t_2=Statsample::Test.t_two_samples_independent(a,b)
  summary t_2
end

if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/tetrachoric.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'

Statsample::Analysis.store(Statsample::Bivariate::Tetrachoric) do
  
a=40
b=10
c=20
d=30
summary tetrachoric(a,b,c,d)
end

if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/u_test.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib')
require 'statsample'

Statsample::Analysis.store(Statsample::Test::UMannWhitney) do

  a=10.times.map {rand(100)}.to_scale
  b=20.times.map {(rand(20))**2+50}.to_scale

  u=Statsample::Test::UMannWhitney.new(a,b)
  summary u
end

if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/vector.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'

Statsample::Analysis.store(Statsample::Vector) do

  a=Statsample::Vector.new_scale(1000) {r=rand(5); r==4 ? nil: r;}
  summary a
  b=c(1,2,3,4,6..10)
  summary b
  
end

if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: examples/velicer_map_test.rb
================================================
#!/usr/bin/ruby
$:.unshift(File.dirname(__FILE__)+'/../lib/')

require 'statsample'

Statsample::Analysis.store(Statsample::Factor::MAP) do
  
  rng=Distribution::Normal.rng
  samples=100
  variables=10
  
  f1=rnorm(samples)
  f2=rnorm(samples)
  
  vectors={}
  
  variables.times do |i|
  vectors["v#{i}"]=samples.times.collect {|nv|    
  if i<5
    f1[nv]*5 + f2[nv] *2 +rng.call
  else
    f1[nv]*2 + f2[nv] *3 +rng.call
  end
  }.to_scale
  end
  
  
  ds=vectors.to_dataset
  cor=cor(ds)
  pca=pca(cor)
  
  map=Statsample::Factor::MAP.new(cor)
  
  echo ("There are 2 real factors on data")
  summary(pca)
  echo("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
  summary(map)
  echo("Velicer's MAP Test returns #{map.number_of_factors} factors to preserve")
end
if __FILE__==$0
  Statsample::Analysis.run_batch
end


================================================
FILE: grab_references.rb
================================================
#!/usr/bin/env ruby1.9
require 'reportbuilder'
refs=[]
Dir.glob "**/*.rb" do |f|
  next if f=~/pkg/
	reference=false
	File.open(f).each_line 	do |l|
		
		if l=~/== Reference/
		    reference=true
		elsif reference
			if l=~/\*\s+(.+)/
				refs.push $1
			else
				reference=false
			end
	        end
	    
	end
end


rb=ReportBuilder.new(:name=>"References") do |g|
	refs.uniq.sort.each do |r|
		g.text "* #{r}"
	end
end

rb.save_text("references.txt")

================================================
FILE: lib/spss.rb
================================================
# = spss.rb - 
#
# Provides utilites for working with spss files
#
# Copyright (C) 2009 Claudio Bustos
#
# Claudio Bustos mailto:clbustos@gmail.com

module SPSS # :nodoc: all
  module Dictionary
    class Element
      def add(a)
        @elements.push(a)
      end
      def parse_elements(func=:to_s)
        @elements.collect{|e| "   "+e.send(func)}.join("\n")
      end
      def init_with config
        config.each {|key,value|
            self.send(key.to_s+"=",value) if methods.include? key.to_s
        }
      end
      def initialize(config={})
        @config=config
        @elements=[]
      end
    end
    class Dictionary < Element
      attr_accessor :locale, :date_time, :row_count
      def initialize(config={})
        super
        init_with ({
                :locale=>"en_US", 
                :date_time=>Time.new().strftime("%Y-%m-%dT%H:%M:%S"),
                :row_count=>1
        })
        init_with config
      end
      
      def to_xml
        "<dictionary locale='#{@locale}' creationDateTime='#{@date_time}' rowCount='#{@row_count}' xmlns='http://xml.spss.com/spss/data'>\n"+parse_elements(:to_xml)+"\n</dictionary>"
        
      end
      def to_spss
        parse_elements(:to_spss)
      end
    end
    
    class MissingValue < Element
      attr_accessor :data, :type, :from, :to
      def initialize(data,type=nil)
        @data=data
        if type.nil? or type=="lowerBound" or type=="upperBound"
            @type=type
        else
            raise Exception,"Incorrect value for type"
        end
      end
      def to_xml
        "<missingValue data='#{@data}' "+(type.nil? ? "":"type='#{type}'")+"/>"
      end
    end
    class LabelSet
      attr_accessor
      def initialize(labels)
        @labels=labels
      end
      def parse_xml(name)
        "<valueLabelSet>\n   "+@labels.collect{|key,value| "<valueLabel label='#{key}' value='#{value}' />"}.join("\n   ")+"\n   <valueLabelVariable name='#{name}' />\n</valueLabelSet>"
      end
      def parse_spss()
        @labels.collect{|key,value| "#{key} '#{value}'"}.join("\n   ")
      end
    end
    class Variable < Element
      attr_accessor :aligment, :display_width, :label, :measurement_level, :name, :type, :decimals, :width, :type_format, :labelset, :missing_values
      def initialize(config={})
        super
        @@var_number||=1
        init_with({
          :aligment           =>  "left",
          :display_width      =>  8,
          :label              =>  "Variable #{@@var_number}",
          :measurement_level  =>  "SCALE",
          :name               =>  "var#{@@var_number}",
          :type               =>  0,
          :decimals           =>  2,
          :width              =>  10,
          :type_format        =>  "F",
          :labelset           => nil
        })
        init_with config
        @missing_values=[]
        @@var_number+=1
      end
      def to_xml
        labelset_s=(@labelset.nil?) ? "":"\n"+@labelset.parse_xml(@name)
        missing_values=(@missing_values.size>0) ? @missing_values.collect {|m| m.to_xml}.join("\n"):""
        "<variable aligment='#{@aligment}' displayWidth='#{@display_width}' label='#{@label}' measurementLevel='#{@measurement_level}' name='#{@name}' type='#{@type}'>\n<variableFormat decimals='#{@decimals}' width='#{@width}' type='#{@type_format}' />\n"+parse_elements(:to_xml)+missing_values+"</variable>"+labelset_s
      end
      def to_spss
        out=<<HERE
VARIABLE LABELS #{@name} '#{label}' .
VARIABLE ALIGMENT #{@name} (#{@aligment.upcase}) .
VARIABLE WIDTH #{@name} (#{@display_width}) .
VARIABLE LEVEL #{@name} (#{@measurement_level.upcase}) .
HERE
        if !@labelset.nil?
            out << "VALUE LABELS #{@name} "+labelset.parse_spss()+" ."
        end
        if @missing_values.size>0
            out << "MISSING VALUES #{@name} ("+@missing_values.collect{|m| m.data}.join(",")+") ."
        end
        out
      end
    end
  end
end
n=SPSS::Dictionary::Dictionary.new
ls=SPSS::Dictionary::LabelSet.new({1=>"Si",2=>"No"})
var1=SPSS::Dictionary::Variable.new
var1.labelset=ls
mv1=SPSS::Dictionary::MissingValue.new("-99")
var2=SPSS::Dictionary::Variable.new
n.add(var1)
n.add(var2)
var2.missing_values=[mv1]

File.open("dic_spss.sps","wb") {|f|
    f.puts n.to_spss
}


================================================
FILE: lib/statsample/analysis/suite.rb
================================================
module Statsample
  module Analysis
    class Suite 
      include Statsample::Shorthand
      attr_accessor :output
      attr_accessor :name
      attr_reader :block
      def initialize(opts=Hash.new(), &block)
        if !opts.is_a? Hash
          opts={:name=>opts}
        end
         
        @block=block
        @name=opts[:name] || "Analysis #{Time.now}"
        @attached=[]
        @output=opts[:output] || ::STDOUT
      end
      # Run the analysis, putting output on 
      def run
         @block.arity<1 ? instance_eval(&@block) : @block.call(self)
      end
      # Provides a description of the procedure. Only appears as a commentary on 
      # SuiteReportBuilder outputs
      def desc(d)
        @output.puts("Description:")
        @output.puts("  #{d}") 
      end
      def echo(*args)
        @output.puts(*args)
      end
      def summary(obj)
        obj.summary
      end
      def add_to_reportbuilder(rb)
        SuiteReportBuilder.new({:name=>name, :rb=>rb}, &block)
      end
      
      def generate(filename)
        ar=SuiteReportBuilder.new({:name=>name}, &block)
        ar.generate(filename)
      end
      def to_text
        ar=SuiteReportBuilder.new({:name=>name}, &block)
        ar.to_text
      end
      
      def attach(ds)
        @attached.push(ds)
      end
      def detach(ds=nil)
        if ds.nil?
          @attached.pop
        else
          @attached.delete(ds)
        end
      end
      alias :old_boxplot :boxplot
      alias :old_histogram :histogram
      alias :old_scatterplot :scatterplot

      def show_svg(svg)
        require 'tmpdir'
        fn=Dir.tmpdir+"/image_#{Time.now.to_f}.svg"
        File.open(fn,"w") {|fp| fp.write svg}
	if RUBY_PLATFORM =~/darwin/
	  %x(open -a safari #{fn})
	else
	  %x(xdg-open #{fn})
	end
      end
      def boxplot(*args)
        show_svg(old_boxplot(*args).to_svg)
      end
      def histogram(*args)
        show_svg(old_histogram(*args).to_svg)
      end
      def scatterplot(*args)
        show_svg(old_scatterplot(*args).to_svg)
      end
      
      def method_missing(name, *args,&block)
        @attached.reverse.each do |ds|
          return ds[name.to_s] if ds.fields.include? (name.to_s)
        end
        raise "Method #{name} doesn't exists"
      end
    end
  end
end


================================================
FILE: lib/statsample/analysis/suitereportbuilder.rb
================================================
module Statsample
  module Analysis
    class SuiteReportBuilder < Suite
      attr_accessor :rb
      def initialize(opts=Hash.new,&block)
        if !opts.is_a? Hash
          opts={:name=>opts}
        end        
        super(opts,&block)
        @rb=opts[:rb] || ReportBuilder.new(:name=>name)
      end
      def generate(filename)
        run if @block
        @rb.save(filename)
      end
      def to_text
        run if @block
        @rb.to_text
      end
      def summary(o)
        @rb.add(o)
      end
      def desc(d)
        @rb.add(d)
      end
      def echo(*args)
        args.each do |a|
          @rb.add(a)
        end
      end
      
      def boxplot(*args)
        @rb.add(old_boxplot(*args))
      end
      def histogram(*args)
        @rb.add(old_histogram(*args))
      end
      def boxplot(*args)
        @rb.add(old_boxplot(*args))
      end
      
    end
  end
end


================================================
FILE: lib/statsample/analysis.rb
================================================
require 'statsample/analysis/suite'
require 'statsample/analysis/suitereportbuilder'

module Statsample
  # DSL to create analysis without hazzle. 
  # * Shortcuts methods to avoid use complete namescapes, many based on R  
  # * Attach/detach vectors to workspace, like R
  # == Example
  #  an1=Statsample::Analysis.store(:first) do
  #    # Load excel file with x,y,z vectors
  #    ds=excel('data.xls')
  #    # See variables on ds dataset
  #    names(ds) 
  #    # Attach the vectors to workspace, like R
  #    attach(ds)
  #    # vector 'x' is attached to workspace like a method,
  #    # so you can use like any variable
  #    mean,sd=x.mean, x.sd 
  #    # Shameless R robbery
  #    a=c( 1:10)
  #    b=c(21:30)
  #    summary(cor(ds)) # Call summary method on correlation matrix
  #  end
  #  # You can run the analysis by its name
  #  Statsample::Analysis.run(:first)
  #  # or using the returned variables
  #  an1.run
  #  # You can also generate a report using ReportBuilder.
  #  # .summary() method call 'report_building' on the object, 
  #  # instead of calling text summary
  #  an1.generate("report.html")
  module Analysis
    @@stored_analysis={}
    @@last_analysis=nil
    def self.clear_analysis
      @@stored_analysis.clear
    end
    def self.stored_analysis
      @@stored_analysis
    end
    def self.last
      @@stored_analysis[@@last_analysis]
    end
    def self.store(name, opts=Hash.new,&block)
      raise "You should provide a block" if !block
      @@last_analysis=name
      opts={:name=>name}.merge(opts)
      @@stored_analysis[name]=Suite.new(opts,&block)
    end
    # Run analysis +*args+
    # Without arguments, run all stored analysis
    # Only 'echo' will be returned to screen
    def self.run(*args)
      args=stored_analysis.keys if args.size==0
      raise "Analysis #{args} doesn't exists" if (args - stored_analysis.keys).size>0
      args.each do |name|
        stored_analysis[name].run
      end
    end

    # Add analysis +*args+ to an reportbuilder object.
    # Without arguments, add all stored analysis
    # Each analysis is wrapped inside a ReportBuilder::Section object
    # This is the method is used by save() and to_text()
    
    def self.add_to_reportbuilder(rb, *args)
      args=stored_analysis.keys if args.size==0
      raise "Analysis #{name} doesn't exists" if (args - stored_analysis.keys).size>0
      args.each do |name|
        section=ReportBuilder::Section.new(:name=>stored_analysis[name].name)
        rb_an=stored_analysis[name].add_to_reportbuilder(section)
        rb.add(section)        
        rb_an.run
      end
    end
    
    # Save the analysis on a file
    # Without arguments, add all stored analysis    
    def self.save(filename, *args)
      rb=ReportBuilder.new(:name=>filename)
      add_to_reportbuilder(rb, *args)
      rb.save(filename)
    end
    
    # Run analysis and return as string
    # output of echo callings
    # Without arguments, add all stored analysis
    
    def self.to_text(*args)
      rb=ReportBuilder.new(:name=>"Analysis #{Time.now}")
      add_to_reportbuilder(rb, *args)
      rb.to_text
    end
    # Run analysis and return to screen all
    # echo and summary callings
    def self.run_batch(*args)
      puts to_text(*args)
    end    
  end
end


================================================
FILE: lib/statsample/anova/contrast.rb
================================================
module Statsample
  module Anova
    class Contrast
      attr_reader :psi

      attr_reader :msw
      include Summarizable
      def initialize(opts=Hash.new)
        raise "Should set at least vectors options" if opts[:vectors].nil?
        @vectors=opts[:vectors]
        @c=opts[:c]
        @c1,@c2=opts[:c1], opts[:c2]
        @t_options=opts[:t_options] || {:estimate_name=>_("Psi estimate")}
        @name=opts[:name] || _("Contrast")
        @psi=nil
        @anova=Statsample::Anova::OneWayWithVectors.new(@vectors)
        @msw=@anova.msw
      end
      # Hypothesis contrast, selecting index for each constrast
      # For example, if you want to contrast x_0 against x_1 and x_2
      # you should use
      # c.contrast([0],[1,2])
      def c_by_index(c1,c2)
        contrast=[0]*@vectors.size
        c1.each {|i| contrast[i]=1.quo(c1.size)}
        c2.each {|i| contrast[i]=-1.quo(c2.size)}
        @c=contrast
        c(contrast)
      end
      def psi
        if @psi.nil?
          c(@c) if @c
          c_by_index(@c1,@c2) if (@c1 and @c2)
        end
        @psi
      end
      def confidence_interval(cl=nil)
        t_object.confidence_interval(cl)
      end
      # Hypothesis contrast, using custom values
      # Every parameter is a contrast value. You should use
      # the same number of contrast as vectors on class and the sum
      # of constrast should be 0.
      def c(args=nil)
        
        return @c if args.nil?
        @c=args
        raise "contrast number!=vector number" if args.size!=@vectors.size
        #raise "Sum should be 0" if args.inject(0) {|ac,v| ac+v}!=0
        @psi=args.size.times.inject(0) {|ac,i| ac+(args[i]*@vectors[i].mean)}
      end
      def standard_error
        sum=@vectors.size.times.inject(0) {|ac,i|
          ac+((@c[i].rationalize**2).quo(@vectors[i].size))
        } 
        Math.sqrt(@msw*sum)
      end
      alias :se :standard_error
      def df
        @vectors.inject(0) {|ac,v| ac+v.size}-@vectors.size
      end
      def t_object
        Statsample::Test::T.new(psi, se, df, @t_options)
      end
      def t
        t_object.t
      end
      def probability
        t_object.probability
      end
      def report_building(builder)
         builder.section(:name=>@name) do |s|
           s.text _("Contrast:%s") % c.join(",")
           s.parse_element(t_object)
         end
      end
    end
  end
end


================================================
FILE: lib/statsample/anova/oneway.rb
================================================
module Statsample
  module Anova
    # = Generic Anova one-way.
    # You could enter the sum of squares or the mean squares. You
    # should enter the degrees of freedom for numerator and denominator.
    # == Usage
    #  anova=Statsample::Anova::OneWay(:ss_num=>10,:ss_den=>20, :df_num=>2, :df_den=>10, @name=>"ANOVA for....")
    class OneWay
      include Summarizable
      attr_reader :df_num, :df_den, :ss_num, :ss_den, :ms_num, :ms_den, :ms_total, :df_total, :ss_total
      # Name of ANOVA Analisys
      attr_accessor :name
      attr_accessor :name_denominator
      attr_accessor :name_numerator
      def initialize(opts=Hash.new)
        @name=@name_numerator=@name_denominator=nil
        
        # First see if sum of squares or mean squares are entered
        raise ArgumentError, "You should set d.f." unless (opts.has_key? :df_num and opts.has_key? :df_den)
        @df_num=opts.delete :df_num
        @df_den=opts.delete :df_den
        @df_total=@df_num+@df_den
        if(opts.has_key? :ss_num and opts.has_key? :ss_den)
          @ss_num = opts.delete :ss_num
          @ss_den =opts.delete :ss_den
          @ms_num =@ss_num.quo(@df_num)
          @ms_den =@ss_den.quo(@df_den) 
        elsif (opts.has_key? :ms_num and opts.has_key? :ms_den)
          @ms_num =opts.delete :ms_num
          @ms_den =opts.delete :ms_den
          @ss_num =@ms_num * @df_num
          @ss_den =@ss_den * @df_den
        end
        @ss_total=@ss_num+@ss_den
        @ms_total=@ms_num+@ms_den
        opts_default={:name=>"ANOVA",
                      :name_denominator=>_("Explained variance"),
                      :name_numerator=>_("Unexplained variance")}
        @opts=opts_default.merge(opts)
        opts.keys.each {|k|
          send("#{k}=", @opts[k]) if self.respond_to? "#{k}="
        }
        @f_object=Statsample::Test::F.new(@ms_num, @ms_den, @df_num,@df_den)
      end
      # F value
      def f
        @f_object.f
      end
      # P-value of F test
      def probability
        @f_object.probability
      end
      def report_building(builder) #:nodoc:
        builder.section(:name=>@name) do |b|
          report_building_table(b)
        end
      end
      def report_building_table(builder) #:nodoc:
        builder.table(:name=>_("%s Table") % @name, :header=>%w{source ss df ms f p}.map {|v| _(v)}) do |t|
          t.row([@name_numerator, sprintf("%0.3f",@ss_num),   @df_num, sprintf("%0.3f",@ms_num),  sprintf("%0.3f",f), sprintf("%0.3f", probability)])
          t.row([@name_denominator, sprintf("%0.3f",@ss_den),  @df_den, sprintf("%0.3f",@ms_den), "", ""])
          t.row([_("Total"), sprintf("%0.3f",@ss_total),  @df_total, sprintf("%0.3f",@ms_total),"",""])
        end
      end

    end
    
    # One Way Anova with vectors
    # Example:
    #   v1=[2,3,4,5,6].to_scale
    #   v2=[3,3,4,5,6].to_scale
    #   v3=[5,3,1,5,6].to_scale
    #   anova=Statsample::Anova::OneWayWithVectors.new([v1,v2,v3])
    #   anova.f
    #   => 0.0243902439024391
    #   anova.probability
    #   => 0.975953044203438
    #   anova.sst 
    #   => 32.9333333333333
    #
    class OneWayWithVectors < OneWay
      # Show on summary Levene test
      attr_accessor :summary_levene
      # Show on summary descriptives for vectors
      attr_accessor :summary_descriptives
      # Show on summary of contrasts
      attr_accessor :summary_contrasts
      # Array with stored contrasts
      attr_reader :contrasts
      
      def initialize(*args)
        if args[0].is_a? Array
          @vectors=args.shift
        else
          @vectors=args.find_all {|v| v.is_a? Statsample::Vector}
          opts=args.find {|v| v.is_a? Hash}
        end
        opts||=Hash.new
        opts_default={:name=>_("Anova One-Way"), 
                      :name_numerator=>_("Between Groups"),
                      :name_denominator=>_("Within Groups"),
                      :summary_descriptives=>false,
                      :summary_levene=>true,
                      :summary_contrasts=>true
        }
        @opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
        @contrasts=[]
        super(@opts)
      end
      alias :sst :ss_total 
      alias :msb :ms_num
      alias :msw :ms_den
      
      # Generates and store a contrast.
      # Options should be provided as a hash
      # [:c]=>contrast vector
      # [:c1 - :c2]=>index for automatic construction of contrast
      # [:name]=>contrast name
      
      def contrast(opts=Hash.new)
        name=opts[:name] || _("Contrast for %s") % @name
        opts=opts.merge({:vectors=>@vectors, :name=>name})
        c=Statsample::Anova::Contrast.new(opts)
        @contrasts.push(c)
        c
      end
      
      def levene
        Statsample::Test.levene(@vectors, :name=>_("Test of Homogeneity of variances (Levene)"))
      end
      # Total mean
      def total_mean
        sum=@vectors.inject(0){|a,v| a+v.sum}
        sum.quo(n)
      end
      # Sum of squares within groups
      def sswg
        @sswg||=@vectors.inject(0) {|total,vector| total+vector.ss }
      end
      # Sum of squares between groups
      def ssbg
        m=total_mean
        @vectors.inject(0) do |total,vector|
          total + (vector.mean-m).square * vector.size 
        end
      end
      # Degrees of freedom within groups
      def df_wg
        @dk_wg||=n-k
      end
      def k
        @k||=@vectors.size
      end
      # Degrees of freedom between groups 
      def df_bg
          k-1
      end
      # Total number of cases
      def n
          @vectors.inject(0){|a,v| a+v.size}
      end
      def report_building(builder) # :nodoc:
        builder.section(:name=>@name) do |s|
          if summary_descriptives
            s.table(:name=>_("Descriptives"),:header=>%w{Name N Mean SD Min Max}.map {|v| _(v)}) do |t|
              @vectors.each do |v|
                t.row [v.name, v.n_valid, "%0.4f" % v.mean, "%0.4f" %  v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
              end
            end
          end
          
          if summary_levene
            s.parse_element(levene)
          end
          report_building_table(s)
          if summary_contrasts and @contrasts.size>0

            @contrasts.each do |c|
              s.parse_element(c)
            end
          end
          
        end
      end
    end
  end
end


================================================
FILE: lib/statsample/anova/twoway.rb
================================================
module Statsample
  module Anova
    # = Generic Anova two-way.
    # You could enter the sum of squares or the mean squares for a, b, axb and within. 
    # You should enter the degrees of freedom for a,b and within, because df_axb=df_a*df_b
    # == Usage
    #  anova=Statsample::Anova::TwoWay(:ss_a=>10,:ss_b=>20,:ss_axb=>10, :ss_within=>20, :df_a=>2, :df_b=>3,df_within=100 @name=>"ANOVA for....")
    class TwoWay
      include Summarizable
      attr_reader :df_a, :df_b, :df_axb, :df_within, :df_total
      attr_reader :ss_a, :ss_b, :ss_axb, :ss_within, :ss_total
      attr_reader :ms_a, :ms_b, :ms_axb, :ms_within, :ms_total
      # Name of ANOVA Analisys
      attr_accessor :name
      # Name of a factor
      attr_accessor :name_a
      # Name of b factor
      attr_accessor :name_b
      # Name of within factor
      attr_accessor :name_within
      
      attr_reader :f_a_object, :f_b_object, :f_axb_object
      def initialize(opts=Hash.new)
        # First see if sum of squares or mean squares are entered
        raise ArgumentError, "You should set all d.f." unless [:df_a, :df_b, :df_within].all? {|v| opts.has_key? v}
        
        @df_a=opts.delete :df_a
        @df_b=opts.delete :df_b
        @df_axb=@df_a*@df_b
        @df_within=opts.delete :df_within
        @df_total=@df_a+@df_b+@df_axb+@df_within
        
        if [:ss_a, :ss_b, :ss_axb, :ss_within].all? {|v| opts.has_key? v}
          @ss_a = opts.delete :ss_a
          @ss_b = opts.delete :ss_b
          @ss_axb = opts.delete :ss_axb
          @ss_within = opts.delete :ss_within
          
          @ms_a =@ss_a.quo(@df_a)
          @ms_b =@ss_b.quo(@df_b) 
          @ms_axb =@ss_axb.quo(@df_axb)
          @ms_within =@ss_within.quo(@df_within) 

        elsif [:ms_a, :ms_b, :ms_axb, :ms_within].all? {|v| opts.has_key? v}
          @ms_a = opts.delete :ms_a
          @ms_b = opts.delete :ms_b
          @ms_axb = opts.delete :ms_axb
          @ms_within = opts.delete :ms_within
          
          @ss_a =@ms_a*@df_a
          @ss_b =@ms_b*@df_b 
          @ss_axb =@ms_axb*@df_axb
          @ss_within =@ms_within*@df_within
        else
          raise "You should set all ss or ss"
        end
        @ss_total=@ss_a+@ss_b+@ss_axb+@ss_within
        @ms_total=@ms_a+@ms_b+@ms_axb+@ms_within
        opts_default={:name=>_("ANOVA Two-Way"),
                      :name_a=>_("A"),
                      :name_b=>_("B"),
                      :name_within=>_("Within")                      
        }
        @opts=opts_default.merge(opts)
        opts_default.keys.each {|k|
          send("#{k}=", @opts[k])
        }
        @f_a_object=Statsample::Test::F.new(@ms_a,@ms_within,@df_a,@df_within)
        @f_b_object=Statsample::Test::F.new(@ms_b,@ms_within,@df_b,@df_within)
        @f_axb_object=Statsample::Test::F.new(@ms_axb,@ms_within,@df_axb,@df_within)
      end
      def f_a
        @f_a_object.f
      end
      def f_b
        @f_b_object.f
      end
      def f_axb
        @f_axb_object.f
      end
      def f_a_probability
        @f_a_object.probability
      end
      def f_b_probability
        @f_b_object.probability
      end
      def f_axb_probability
        @f_axb_object.probability
      end
            

      def report_building(builder) #:nodoc:
        builder.section(:name=>@name) do |b|
          report_building_table(b)
        end
      end
      def report_building_table(builder) #:nodoc:
        builder.table(:name=>_("%s Table") % @name, :header=>%w{source ss df ms f p}.map {|v| _(v)}) do |t|
          t.row([@name_a, "%0.3f" % @ss_a,   @df_a, "%0.3f" % @ms_a , "%0.3f" % f_a, "%0.4f" % f_a_probability] )
          t.row([@name_b, "%0.3f" % @ss_b,   @df_b, "%0.3f" % @ms_b , "%0.3f" % f_b, "%0.4f" % f_b_probability] )
          t.row(["%s X %s" % [@name_a, @name_b], "%0.3f" % @ss_axb,   @df_axb, "%0.3f" % @ms_axb , "%0.3f" % f_axb, "%0.4f" % f_axb_probability] )          
          t.row([@name_within, "%0.3f" % @ss_within,   @df_within, nil,nil,nil] )
          t.row([_("Total"), "%0.3f" % @ss_total,   @df_total, nil,nil,nil] )          
        end
      end
    end
    
    # Two Way Anova with vectors
    # Example:
    #   v1=[1,1,2,2].to_scale
    #   v2=[1,2,1,2].to_scale
    #   v3=[5,3,1,5].to_scale
    #   anova=Statsample::Anova::TwoWayWithVectors.new(:a=>v1,:b=>v2, :dependent=>v3)
    #
    class TwoWayWithVectors < TwoWay
       # Show summary Levene test
      attr_accessor :summary_levene
      # Show summary descriptives for variables (means)
      attr_accessor :summary_descriptives
      attr_reader :a_var, :b_var, :dep_var
      # For now, only equal sample cells allowed
      def initialize(opts=Hash.new)
        raise "You should insert at least :a, :b and :dependent" unless  [:a, :b, :dependent].all? {|v| opts.has_key? v}
        @a_var='a'
        @b_var='b'
        @dep_var='dependent'
        @a_vector, @b_vector, @dep_vector=Statsample.only_valid_clone opts[:a], opts[:b], opts[:dependent]
        
        ds={@a_var=>@a_vector, @b_var=>@b_vector, @dep_var=>@dep_vector}.to_dataset
        @ds=ds.clone_only_valid
        _p=@a_vector.factors.size
        _q=@b_vector.factors.size
        @x_general=@dep_vector.mean
        @axb_means={}
        @axb_sd={}
        @vectors=[]
        n=nil
        @ds.to_multiset_by_split(a_var,b_var).each_vector(dep_var) {|k,v|
          @axb_means[k]=v.mean
          @axb_sd[k]=v.sd
          @vectors << v
          n||=v.size
          raise "All cell sizes should be equal" if n!=v.size
        }

        @a_means={}
        @ds.to_multiset_by_split(a_var).each_vector(dep_var) {|k,v|
          @a_means[k]=v.mean
        }
        @b_means={}
        @ds.to_multiset_by_split(b_var).each_vector(dep_var) {|k,v|
          @b_means[k]=v.mean
        }
        ss_a=n*_q*@ds[a_var].factors.inject(0) {|ac,v|
          ac+(@a_means[v]-@x_general)**2
        }
        ss_b=n*_p*@ds[b_var].factors.inject(0) {|ac,v|
          ac+(@b_means[v]-@x_general)**2
        }
        ss_within=@ds.collect {|row|
          (row[dep_var]-@axb_means[[row[a_var],row[b_var]]])**2
        }.sum
        ss_axb=n*@axb_means.inject(0) {|ac,v|
          j,k=v[0]
          xjk=v[1]
          ac+(xjk-@a_means[j]-@b_means[k]+@x_general)**2
        }
        df_a=_p-1
        df_b=_q-1
        df_within=(_p*_q)*(n-1)
        
        opts_default={:name=>_("Anova Two-Way on %s") % @ds[dep_var].name, 
          :name_a=>@ds[a_var].name,
            :name_b=>@ds[b_var].name,
            :summary_descriptives=>true,
            :summary_levene=>false}
            
        @opts=opts_default.merge(opts).merge({:ss_a=>ss_a,:ss_b=>ss_b, :ss_axb=>ss_axb, :ss_within=>ss_within, :df_a=>df_a, :df_b=>df_b, :df_within=>df_within})
        
        
        super(@opts)
      end
      def levene
        Statsample::Test.levene(@vectors, :name=>_("Test of Homogeneity of variances (Levene)"))
      end      
      def report_building(builder) #:nodoc:#
        builder.section(:name=>@name) do |s|
          if summary_descriptives
            s.table(:header =>['']+@ds[a_var].factors.map {|a| @ds[a_var].labeling(a)}+[_("%s Mean") % @name_b]) do |t|
              @ds[b_var].factors.each do |b|
                t.row([@ds[b_var].labeling(b)]+@ds[a_var].factors.map {|a| "%0.3f" % @axb_means[[a,b]] } + ["%0.3f" % @b_means[b]])
              end
              t.row([_("%s Mean") % @name_a]+@ds[a_var].factors.map {|a| "%0.3f" % @a_means[a]}+ ["%0.3f" % @x_general])
            end
          end
          if summary_levene
            s.parse_element(levene)
          end
          report_building_table(s)

        end
      end
    end
  end
end


================================================
FILE: lib/statsample/anova.rb
================================================
module Statsample
  module Anova
    class << self
      def oneway(*args)
        OneWay.new(*args)
      end      
      def twoway(*args)
        TwoWay.new(*args)
      end      
      
      def oneway_with_vectors(*args)
        OneWayWithVectors.new(*args)
      end
      def twoway_with_vectors(*args)
        TwoWayWithVectors.new(*args)
      end
      
    end
  end
end

require 'statsample/anova/oneway'
require 'statsample/anova/contrast'
require 'statsample/anova/twoway'


================================================
FILE: lib/statsample/bivariate/pearson.rb
================================================
module Statsample
  module Bivariate
    # = Pearson correlation coefficient (r) 
    # 
    # The moment-product Pearson's correlation coefficient, known as 'r'
    # is a measure of bivariate associate between two continous
    # variables.
    # 
    # == Usage
    #   a = [1,2,3,4,5,6].to_scale
    #   b = [2,3,4,5,6,7].to_scale
    #   pearson = Statsample::Bivariate::Pearson.new(a,b)
    #   puts pearson.r
    #   puts pearson.t
    #   puts pearson.probability
    #   puts pearson.summary
    # 
    class Pearson
      
      include Statsample::Test
      include Summarizable
      # Name of correlation
      attr_accessor :name
      # Tails for probability (:both, :left or :right)
      attr_accessor :tails     
      attr_accessor :n      
      def initialize(v1,v2,opts=Hash.new)
        @v1_name,@v2_name = v1.name,v2.name
        @v1,@v2           = Statsample.only_valid_clone(v1,v2)
        @n=@v1.size
        opts_default={
          :name=>_("Correlation (%s - %s)") % [@v1_name, @v2_name],
          :tails=>:both
        }
        @opts=opts.merge(opts_default)
        @opts.each{|k,v|
          self.send("#{k}=",v) if self.respond_to? k
        }
      end
      def r
        Statsample::Bivariate.pearson(@v1,@v2)
      end
      def t
        Statsample::Bivariate.t_pearson(@v1,@v2)
      end
      def probability
        p_using_cdf(Distribution::T.cdf(t, @v1.size-2), tails)
      end
      def report_building(builder)
        builder.text(_("%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)") % [@name, r,t, (n-2), probability, tails])
      end
    end
  end
end

================================================
FILE: lib/statsample/bivariate.rb
================================================
require 'statsample/bivariate/pearson'
module Statsample
  # Diverse methods and classes to calculate bivariate relations
  # Specific classes: 
  # * Statsample::Bivariate::Pearson : Pearson correlation coefficient (r)
  # * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
  # * Statsample::Bivariate::Polychoric  : Polychoric correlation (using joint, two-step and polychoric series)
  module Bivariate
    autoload(:Polychoric, 'statsample/bivariate/polychoric')
    autoload(:Tetrachoric, 'statsample/bivariate/tetrachoric')
    class << self
      # Covariance between two vectors
      def covariance(v1,v2)
        v1a,v2a=Statsample.only_valid_clone(v1,v2)
        return nil if v1a.size==0
        if Statsample.has_gsl?
          GSL::Stats::covariance(v1a.gsl, v2a.gsl)
        else
          covariance_slow(v1a,v2a)
        end
      end
      # Estimate the ML between two dichotomic vectors
      def maximum_likehood_dichotomic(pred,real)
        preda,reala=Statsample.only_valid_clone(pred,real)                
        sum=0
        preda.each_index{|i|
           sum+=(reala[i]*Math::log(preda[i])) + ((1-reala[i])*Math::log(1-preda[i]))
        }
        sum
      end
      
      def covariance_slow(v1,v2) # :nodoc:
        v1a,v2a=Statsample.only_valid(v1,v2)
        sum_of_squares(v1a,v2a) / (v1a.size-1)
      end
      def sum_of_squares(v1,v2)
        v1a,v2a=Statsample.only_valid_clone(v1,v2)        
        m1=v1a.mean
        m2=v2a.mean
        (v1a.size).times.inject(0) {|ac,i| ac+(v1a[i]-m1)*(v2a[i]-m2)}
      end
      # Calculate Pearson correlation coefficient (r) between 2 vectors
      def pearson(v1,v2)
        v1a,v2a=Statsample.only_valid_clone(v1,v2)
        return nil if v1a.size ==0
        if Statsample.has_gsl?
          GSL::Stats::correlation(v1a.gsl, v2a.gsl)
        else
          pearson_slow(v1a,v2a)
        end
      end
      def pearson_slow(v1,v2) # :nodoc:
        v1a,v2a=Statsample.only_valid_clone(v1,v2)
        # Calculate sum of squares
        ss=sum_of_squares(v1a,v2a)
        ss.quo(Math::sqrt(v1a.sum_of_squares) * Math::sqrt(v2a.sum_of_squares))
      end
      alias :correlation :pearson
      # Retrieves the value for t test for a pearson correlation
      # between two vectors to test the null hipothesis of r=0
      def t_pearson(v1,v2)
        v1a,v2a=Statsample.only_valid_clone(v1,v2)
        r=pearson(v1a,v2a)
        if(r==1.0) 
          0
        else
          t_r(r,v1a.size)
        end
      end
      # Retrieves the value for t test for a pearson correlation
      # giving r and vector size
      # Source : http://faculty.chass.ncsu.edu/garson/PA765/correl.htm
      def t_r(r,size)
        r * Math::sqrt(((size)-2).to_f / (1 - r**2))
      end
      # Retrieves the probability value (a la SPSS)
      # for a given t, size and number of tails.
      # Uses a second parameter 
      # * :both  or 2  : for r!=0 (default)
      # * :right, :positive or 1  : for r > 0
      # * :left, :negative        : for r < 0
      
      def prop_pearson(t, size, tails=:both)
        tails=:both if tails==2
        tails=:right if tails==1 or tails==:positive
        tails=:left if tails==:negative
        
        n_tails=case tails
          when :both then 2
          else 1
        end
        t=-t if t>0 and (tails==:both)
        cdf=Distribution::T.cdf(t, size-2)
        if(tails==:right)
          1.0-(cdf*n_tails)
        else
          cdf*n_tails
        end
      end
      
      
      # Predicted time for pairwise correlation matrix, in miliseconds
      # See benchmarks/correlation_matrix.rb to see mode of calculation
      
      def prediction_pairwise(vars,cases)
        ((-0.518111-0.000746*cases+1.235608*vars+0.000740*cases*vars)**2) / 100
      end
      # Predicted time for optimized correlation matrix, in miliseconds
      # See benchmarks/correlation_matrix.rb to see mode of calculation
      
      def prediction_optimized(vars,cases)
        ((4+0.018128*cases+0.246871*vars+0.001169*vars*cases)**2) / 100
      end
      # Returns residual score after delete variance
      # from another variable
      # 
      def residuals(from,del)
        r=Statsample::Bivariate.pearson(from,del)
        froms, dels = from.vector_standarized, del.vector_standarized
        nv=[]
        froms.data_with_nils.each_index do |i|
          if froms[i].nil? or dels[i].nil?
            nv.push(nil)
          else
            nv.push(froms[i]-r*dels[i])
          end
        end
        nv.to_vector(:scale)
      end
      # Correlation between v1 and v2, controling the effect of
      # control on both.
      def partial_correlation(v1,v2,control)
        v1a,v2a,cona=Statsample.only_valid_clone(v1,v2,control)
        rv1v2=pearson(v1a,v2a)
        rv1con=pearson(v1a,cona)
        rv2con=pearson(v2a,cona)        
        (rv1v2-(rv1con*rv2con)).quo(Math::sqrt(1-rv1con**2) * Math::sqrt(1-rv2con**2))
        
      end
      
      def covariance_matrix_optimized(ds)
        x=ds.to_gsl
        n=x.row_size
        m=x.column_size
        means=((1/n.to_f)*GSL::Matrix.ones(1,n)*x).row(0)
        centered=x-(GSL::Matrix.ones(n,m)*GSL::Matrix.diag(means))
        ss=centered.transpose*centered
        s=((1/(n-1).to_f))*ss
        s
      end
      
      # Covariance matrix.
      # Order of rows and columns depends on Dataset#fields order
      
      def covariance_matrix(ds)
        vars,cases=ds.fields.size,ds.cases
        if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
          cm=covariance_matrix_optimized(ds)
        else
          cm=covariance_matrix_pairwise(ds)
          
        end
        cm.extend(Statsample::CovariateMatrix)
        cm.fields=ds.fields
        cm
      end
      
      
      def covariance_matrix_pairwise(ds)
        cache={}
        matrix=ds.collect_matrix do |row,col|
          if (ds[row].type!=:scale or ds[col].type!=:scale)
            nil
          elsif row==col
            ds[row].variance
          else
            if cache[[col,row]].nil?
              cov=covariance(ds[row],ds[col])
              cache[[row,col]]=cov
              cov
            else
               cache[[col,row]]
            end
          end
        end
        matrix
      end
      
      # Correlation matrix.
      # Order of rows and columns depends on Dataset#fields order
      def correlation_matrix(ds)
        vars,cases=ds.fields.size,ds.cases
        if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
          cm=correlation_matrix_optimized(ds)
        else
          cm=correlation_matrix_pairwise(ds)
        end
        cm.extend(Statsample::CovariateMatrix)
        cm.fields=ds.fields
        cm
      end

      def correlation_matrix_optimized(ds)
        s=covariance_matrix_optimized(ds)
        sds=GSL::Matrix.diagonal(s.diagonal.sqrt.pow(-1))
        cm=sds*s*sds
        # Fix diagonal
        s.row_size.times {|i|
          cm[i,i]=1.0
        }
        cm
      end
      def correlation_matrix_pairwise(ds)
        cache={}
        cm=ds.collect_matrix do |row,col|
          if row==col
            1.0
          elsif (ds[row].type!=:scale or ds[col].type!=:scale)
            nil
          else
            if cache[[col,row]].nil?
              r=pearson(ds[row],ds[col])
              cache[[row,col]]=r
              r
            else
              cache[[col,row]]
            end 
          end
        end
      end
      
      # Retrieves the n valid pairwise.
      def n_valid_matrix(ds)
        ds.collect_matrix do |row,col|
          if row==col
            ds[row].valid_data.size
          else
            rowa,rowb=Statsample.only_valid_clone(ds[row],ds[col])
            rowa.size
          end
        end
      end
      
      # Matrix of correlation probabilities.
      # Order of rows and columns depends on Dataset#fields order
      
      def correlation_probability_matrix(ds, tails=:both)
        rows=ds.fields.collect do |row|
          ds.fields.collect do |col|
            v1a,v2a=Statsample.only_valid_clone(ds[row],ds[col])
            (row==col or ds[row].type!=:scale or ds[col].type!=:scale) ? nil : prop_pearson(t_pearson(ds[row],ds[col]), v1a.size, tails)
          end
        end
        Matrix.rows(rows)
      end
      
      # Spearman ranked correlation coefficient (rho) between 2 vectors
      def spearman(v1,v2)
        v1a,v2a=Statsample.only_valid_clone(v1,v2)
        v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
        pearson(v1r,v2r)
      end
      # Calculate Point biserial correlation. Equal to Pearson correlation, with
      # one dichotomous value replaced by "0" and the other by "1"
      def point_biserial(dichotomous,continous)
        ds={'d'=>dichotomous,'c'=>continous}.to_dataset.dup_only_valid
        raise(TypeError, "First vector should be dichotomous") if ds['d'].factors.size!=2
        raise(TypeError, "Second vector should be continous") if ds['c'].type!=:scale
        f0=ds['d'].factors.sort[0]
        m0=ds.filter_field('c') {|c| c['d']==f0}
        m1=ds.filter_field('c') {|c| c['d']!=f0}
        ((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
      end
      # Kendall Rank Correlation Coefficient (Tau a)
      # Based on Hervé Adbi article
      def tau_a(v1,v2)
        v1a,v2a=Statsample.only_valid_clone(v1,v2)
        n=v1.size
        v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
        o1=ordered_pairs(v1r)
        o2=ordered_pairs(v2r)
        delta= o1.size*2-(o2  & o1).size*2
        1-(delta * 2 / (n*(n-1)).to_f)
      end
      # Calculates Goodman and Kruskal’s Tau b correlation.
      # Tb is an asymmetric P-R-E measure of association for nominal scales 
      # (Mielke, X)
      # 
      # Tau-b defines perfect association as strict monotonicity. Although it
      # requires strict monotonicity to reach 1.0, it does not penalize ties as
      # much as some other measures.
      # == Reference
      # Mielke, P. GOODMAN–KRUSKAL TAU AND GAMMA. 
      # Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
      def tau_b(matrix)
        v=pairs(matrix)
        ((v['P']-v['Q']).to_f / Math::sqrt((v['P']+v['Q']+v['Y'])*(v['P']+v['Q']+v['X'])).to_f)
      end
      # Calculates Goodman and Kruskal's gamma.
      #
      # Gamma is the surplus of concordant pairs over discordant pairs, as a
      # percentage of all pairs ignoring ties.
      #
      # Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
      def gamma(matrix)
        v=pairs(matrix)
        (v['P']-v['Q']).to_f / (v['P']+v['Q']).to_f
      end
      # Calculate indexes for a matrix the rows and cols has to be ordered
      def pairs(matrix)
        # calculate concordant #p matrix
        rs=matrix.row_size
        cs=matrix.column_size
        conc=disc=ties_x=ties_y=0
        (0...(rs-1)).each do |x|
          (0...(cs-1)).each do |y|
            ((x+1)...rs).each do |x2|
              ((y+1)...cs).each do |y2|
                # #p sprintf("%d:%d,%d:%d",x,y,x2,y2)
                conc+=matrix[x,y]*matrix[x2,y2]
              end
            end
          end
        end
        (0...(rs-1)).each {|x|
          (1...(cs)).each{|y|
            ((x+1)...rs).each{|x2|
              (0...y).each{|y2|
                # #p sprintf("%d:%d,%d:%d",x,y,x2,y2)
                disc+=matrix[x,y]*matrix[x2,y2]
              }
            }
          }
        }
        (0...(rs-1)).each {|x|
          (0...(cs)).each{|y|
            ((x+1)...(rs)).each{|x2|
              ties_x+=matrix[x,y]*matrix[x2,y]
            }
          }
        }
        (0...rs).each {|x|
          (0...(cs-1)).each{|y|
            ((y+1)...(cs)).each{|y2|
              ties_y+=matrix[x,y]*matrix[x,y2]
            }
          }
        }
        {'P'=>conc,'Q'=>disc,'Y'=>ties_y,'X'=>ties_x}
      end
      def ordered_pairs(vector)
        d=vector.data
        a=[]
        (0...(d.size-1)).each{|i|
          ((i+1)...(d.size)).each {|j|
            a.push([d[i],d[j]])
          }
        }
        a
      end
=begin      
      def sum_of_codeviated(v1,v2)
        v1a,v2a=Statsample.only_valid(v1,v2)
        sum=0
        (0...v1a.size).each{|i|
          sum+=v1a[i]*v2a[i]
        }
        sum-((v1a.sum*v2a.sum) / v1a.size.to_f)
      end
=end
      # Report the minimum number of cases valid of a covariate matrix
      # based on a dataset
      def min_n_valid(ds)
        min=ds.cases
        m=n_valid_matrix(ds)
        for x in 0...m.row_size
          for y in 0...m.column_size
            min=m[x,y] if m[x,y] < min
          end
        end
        min
      end
      
      
    end
  end
end


================================================
FILE: lib/statsample/codification.rb
================================================
require 'yaml'

module Statsample
  # This module aids to code open questions
  # * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
  # * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
  # * Recode the vectors, loading the yaml file:
  #   * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
  #   * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
  #
  # Usage:
  #   recode_file="recodification.yaml"
  #   phase=:first # flag
  #   if phase==:first
  #     File.open(recode_file,"w") {|fp|
  #       Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
  #     }
  #   # Edit the file recodification.yaml and verify changes
  #   elsif phase==:second
  #     File.open(recode_file,"r") {|fp|
  #       Statsample::Codification.verify(fp,['vector1'])
  #     }
  #   # Add new vectors to the dataset
  #   elsif phase==:third
  #     File.open(recode_file,"r") {|fp|
  #       Statsample::Codification.recode_dataset_split!(ds,fp,"*")
  #     }
  #   end
  #
  module Codification
    class << self
      # Create a hash, based on vectors, to create the dictionary.
      # The keys will be vectors name on dataset and the values
      # will be hashes, with keys = values, for recodification
      def create_hash(dataset, vectors, sep=Statsample::SPLIT_TOKEN)
        raise ArgumentError,"Array should't be empty" if vectors.size==0
        pro_hash=vectors.inject({}){|h,v_name|
          raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
          v=dataset[v_name]
          split_data=v.splitted(sep).flatten.collect {|c| c.to_s}.find_all {|c| !c.nil?}

          factors=split_data.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac }
          h[v_name]=factors
          h
        }
        pro_hash
      end
      # Create a yaml to create a dictionary, based on vectors
      # The keys will be vectors name on dataset and the values
      # will be hashes, with keys = values, for recodification
      #
      #   v1=%w{a,b b,c d}.to_vector
      #   ds={"v1"=>v1}.to_dataset
      #   Statsample::Codification.create_yaml(ds,['v1'])
      #   => "--- \nv1: \n  a: a\n  b: b\n  c: c\n  d: d\n"
      def create_yaml(dataset, vectors, io=nil, sep=Statsample::SPLIT_TOKEN)
        pro_hash=create_hash(dataset, vectors, sep)
        YAML.dump(pro_hash,io)
      end
      # Create a excel to create a dictionary, based on vectors.
      # Raises an error if filename exists
      # The rows will be:
      # * field: name of vector
      # * original: original name
      # * recoded: new code

      def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
        require 'spreadsheet'
        if File.exist?(filename)
          raise "Exists a file named #{filename}. Delete ir before overwrite."
        end
        book = Spreadsheet::Workbook.new
        sheet = book.create_worksheet
        sheet.row(0).concat(%w{field original recoded})
        i=1
        create_hash(dataset, vectors, sep).sort.each do |field, inner_hash|
          inner_hash.sort.each do |k,v|
            sheet.row(i).concat([field.dup,k.dup,v.dup])
            i+=1
          end
        end
        book.write(filename)
      end
      # From a excel generates a dictionary hash
      # to use on recode_dataset_simple!() or recode_dataset_split!().
      #
      def excel_to_recoded_hash(filename)
        require 'spreadsheet'
        h={}
        book = Spreadsheet.open filename
        sheet= book.worksheet 0
        row_i=0
        sheet.each do |row|
          row_i+=1
          next if row_i==1 or row[0].nil? or row[1].nil? or row[2].nil?
          h[row[0]]={} if h[row[0]].nil?
          h[row[0]][row[1]]=row[2]
        end
        h
      end

      def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
        h.inject({}) do |a,v|
          v[1].split(sep).each do |val|
            a[val]||=[]
            a[val].push(v[0])
          end
          a
        end
      end

      def dictionary(h, sep=Statsample::SPLIT_TOKEN)
        h.inject({}) {|a,v| a[v[0]]=v[1].split(sep); a }
      end

      def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
        dict=dictionary(h,sep)
        new_data=v.splitted(sep)
        new_data.collect do |c|
          if c.nil?
            nil
          else
            c.collect{|value| dict[value] }.flatten.uniq
          end
        end
      end
      def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
        _recode_dataset(dataset,dictionary_hash ,sep,false)
      end
      def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
        _recode_dataset(dataset, dictionary_hash, sep,true)
      end

      def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
        v_names||=h.keys
        v_names.each do |v_name|
          raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
          recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
            if c.nil?
              nil
            else
              c.join(sep)
            end
          }.to_vector
          if(split)
            recoded.split_by_separator(sep).each {|k,v|
              dataset[v_name+"_"+k]=v
            }
          else
            dataset[v_name+"_recoded"]=recoded
          end
        end
      end


      def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
        require 'pp'
        v_names||=h.keys
        v_names.each{|v_name|
          inverse=inverse_hash(h[v_name],sep)
          io.puts "- Field: #{v_name}"
          inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
            io.puts "  - \"#{k}\" (#{v.count}) :\n    -'"+v.join("\n    -'")+"'"
          }
        }
      end
    end
  end
end


================================================
FILE: lib/statsample/converter/csv.rb
================================================
module Statsample
  class CSV < SpreadsheetBase
    if RUBY_VERSION<"1.9"
      require 'fastercsv'
      CSV_klass=::FasterCSV  
    else
      require 'csv'
      CSV_klass=::CSV  
    end    
    class << self

      def read19(filename,ignore_lines=0,csv_opts=Hash.new)
        #default first line is header
        csv_opts.merge!(:headers=>true, :header_converters => :symbol)
        csv = CSV_klass::Table.new(CSV_klass::read(filename,'r',csv_opts))
        csv_headers = if csv_opts[:headers]
          csv.headers
        else
          #as in R, if no header we name the headers as V1,V2,V3,V4,..
          1.upto(csv.first.length).collect { |i| "V#{i}" }
        end
        #we invert row -> column. It means csv[0] is the first column and not row. Similar to R
        csv.by_col!
        thash = {}
        csv_headers.each_with_index do |header,idx|
          thash[header] = Statsample::Vector.new(csv[idx].drop(ignore_lines))
        end
        Statsample::Dataset.new(thash)
      end
      # Returns a Dataset  based on a csv file
      #
      # USE:
      #     ds=Statsample::CSV.read("test_csv.csv")
      def read(filename, empty=[''],ignore_lines=0,csv_opts=Hash.new)        
        first_row=true
        fields=[]
        #fields_data={}
        ds=nil
        line_number=0
        csv=CSV_klass.open(filename,'rb', csv_opts)
        csv.each do |row|
          line_number+=1
          if(line_number<=ignore_lines)
            #puts "Skip line"
            next
          end
          row.collect!{|c| c.to_s }
          if first_row
            fields=extract_fields(row)
            ds=Statsample::Dataset.new(fields)
            first_row=false
          else
            rowa=process_row(row,empty)
            ds.add_case(rowa,false)
          end
        end
        convert_to_scale_and_date(ds,fields)
        ds.update_valid_data
        ds
      end
      # Save a Dataset on a csv file
      #
      # USE:
      #     Statsample::CSV.write(ds,"test_csv.csv")
      def write(dataset,filename, convert_comma=false,*opts)
        
        writer=CSV_klass.open(filename,'w',*opts)
        writer << dataset.fields
        dataset.each_array do|row|
          if(convert_comma)
            row.collect!{|v| v.to_s.gsub(".",",")}
          end
          writer << row
        end
        writer.close
      end
    end
  end
end


================================================
FILE: lib/statsample/converter/spss.rb
================================================
module Statsample
  module SPSS
    class << self
      # Export a SPSS Matrix with tetrachoric correlations .
      #
      # Use: 
      #   ds=Statsample::Excel.read("my_data.xls")
      #   puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
      def tetrachoric_correlation_matrix(ds)
        dsv=ds.dup_only_valid
        # Delete all vectors doesn't have variation
        dsv.fields.each{|f|
          if dsv[f].factors.size==1
            dsv.delete_vector(f) 
          else
            dsv[f]=dsv[f].dichotomize
          end
        }
        tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
        n=dsv.fields.collect {|f|
          sprintf("%d",dsv[f].size)
        }
        meanlist=dsv.fields.collect{|f|
          sprintf("%0.3f", dsv[f].mean)
        }
        stddevlist=dsv.fields.collect{|f|
          sprintf("%0.3f", dsv[f].sd)
        }
        out=<<-HEREDOC
MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
BEGIN DATA
N #{n.join(" ")}
MEAN	#{meanlist.join(" ")}
STDDEV #{stddevlist.join(" ")}
HEREDOC
tcm.row_size.times {|i|
  out +="CORR "
  (i+1).times {|j|
    out+=sprintf("%0.3f",tcm[i,j])+" "
  }
  out +="\n"
}
out+="END DATA.\nEXECUTE.\n"
      end
    end
  end
end


================================================
FILE: lib/statsample/converters.rb
================================================
require 'statsample/converter/spss'
module Statsample
    # Create and dumps Datasets on a database
  module Database
    class << self
      # Read a database query and returns a Dataset
      #
      # USE:
      #
      #  dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
      #  Statsample.read(dbh, "SELECT * FROM test")
      #
      def read(dbh,query)
        require 'dbi'
        sth=dbh.execute(query)
        vectors={}
        fields=[]
        sth.column_info.each {|c|
            vectors[c['name']]=Statsample::Vector.new([])
            vectors[c['name']].name=c['name']
            vectors[c['name']].type= (c['type_name']=='INTEGER' or c['type_name']=='DOUBLE') ? :scale : :nominal
            fields.push(c['name'])
        }
        ds=Statsample::Dataset.new(vectors,fields)
        sth.fetch do |row|
            ds.add_case(row.to_a, false )
        end
        ds.update_valid_data
        ds
      end
      # Insert each case of the Dataset on the selected table
      #
      # USE:
      #        
      #  ds={'id'=>[1,2,3].to_vector, 'name'=>["a","b","c"].to_vector}.to_dataset
      #  dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
      #  Statsample::Database.insert(ds,dbh,"test")
      #
      def insert(ds, dbh, table)
        require 'dbi'            
        query="INSERT INTO #{table} ("+ds.fields.join(",")+") VALUES ("+((["?"]*ds.fields.size).join(","))+")"
        sth=dbh.prepare(query)
        ds.each_array{|c| sth.execute(*c) }
        return true
      end
      # Create a sql, basen on a given Dataset
      #
      # USE:
      #        
      #  ds={'id'=>[1,2,3,4,5].to_vector,'name'=>%w{Alex Peter Susan Mary John}.to_vector}.to_dataset
      #  Statsample::Database.create_sql(ds,'names')
      #   ==>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
      # 
      def create_sql(ds,table,charset="UTF8")
        sql="CREATE TABLE #{table} ("
        fields=ds.fields.collect{|f|
            v=ds[f]
            f+" "+v.db_type
        }
        sql+fields.join(",\n ")+") CHARACTER SET=#{charset};"
      end
    end
  end
  module Mondrian
    class << self
      def write(dataset,filename)
        File.open(filename,"wb") do |fp|
          fp.puts dataset.fields.join("\t")
          dataset.each_array_with_nils do |row|
            row2=row.collect{|v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
            fp.puts row2.join("\t")
          end
        end
      end
    end
  end
  class SpreadsheetBase
    class << self
      def extract_fields(row)
        i=0;
        fields=row.to_a.collect{|c|
          if c.nil?
            i+=1
            "var%05d" % i 
          else
            c.to_s.downcase
          end        
        }
        fields.recode_repeated
      end
                                         
      def process_row(row,empty)
        row.to_a.map do |c|
          if empty.include?(c)
              nil
          else
            if c.is_a? String and c.is_number?
              if c=~/^\d+$/
                c.to_i
              else
                c.gsub(",",".").to_f
              end
            else
              c
            end
          end
        end
      end
      def convert_to_scale_and_date(ds,fields)
        fields.each do |f|
          if ds[f].can_be_scale?
            ds[f].type=:scale
          elsif ds[f].can_be_date?
            ds[f].type=:date
          end
        end
      end
    
    end
  end
    class PlainText < SpreadsheetBase
      class << self
        def read(filename, fields)
          ds=Statsample::Dataset.new(fields)
          fp=File.open(filename,"r")
          fp.each_line do |line|
            row=process_row(line.strip.split(/\s+/),[""])
            next if row==["\x1A"]
            ds.add_case_array(row)
          end
          convert_to_scale_and_date(ds,fields)
          ds.update_valid_data
          fields.each {|f|
            ds[f].name=f
          }
          ds
        end
      end
    end
  class Excel < SpreadsheetBase 
    class << self
      # Write a Excel spreadsheet based on a dataset
      # * TODO: Format nicely date values
      def write(dataset,filename)
        require 'spreadsheet'
        book = Spreadsheet::Workbook.new
        sheet = book.create_worksheet
        format = Spreadsheet::Format.new :color => :blue,
                           :weight => :bold
        sheet.row(0).concat(dataset.fields.map {|i| i.dup}) # Unfreeze strings
        sheet.row(0).default_format = format
        i=1
        dataset.each_array{|row|
          sheet.row(i).concat(row)
          i+=1
        }
        book.write(filename)
      end
      # This should be fixed.
      # If we have a Formula, should be resolver first

      def preprocess_row(row, dates)
        i=-1
        row.collect!{|c|
          i+=1
          if c.is_a? Spreadsheet::Formula
            if(c.value.is_a? Spreadsheet::Excel::Error)
              nil
            else
              c.value
            end
          elsif dates.include? i and !c.nil? and c.is_a? Numeric
              row.date(i)
          else
              c
          end
        }
      end
      private :process_row, :preprocess_row
      
      # Returns a dataset based on a xls file
      # USE:
      #     ds = Statsample::Excel.read("test.xls")
      #
      def read(filename, opts=Hash.new)
        require 'spreadsheet'
        raise "options should be Hash" unless opts.is_a? Hash
        opts_default={
          :worksheet_id=>0, 
          :ignore_lines=>0, 
          :empty=>['']
        }
        
        opts=opts_default.merge opts
        
        worksheet_id=opts[:worksheet_id]
        ignore_lines=opts[:ignore_lines]
        empty=opts[:empty]
        
        first_row=true
        fields=[]
        fields_data={}
        ds=nil
        line_number=0
        book = Spreadsheet.open filename
        sheet= book.worksheet worksheet_id
        sheet.each do |row|
          begin
            dates=[]
            row.formats.each_index{|i|
              if !row.formats[i].nil? and row.formats[i].number_format=="DD/MM/YYYY"
                dates.push(i)
              end
            }
            line_number+=1
            next if(line_number<=ignore_lines)
            
            preprocess_row(row,dates)
            if first_row
              fields=extract_fields(row)
              ds=Statsample::Dataset.new(fields)
              first_row=false
            else
              rowa=process_row(row,empty)
              (fields.size - rowa.size).times {
                rowa << nil
              }
              ds.add_case(rowa,false)
            end
          rescue => e
            error="#{e.to_s}\nError on Line # #{line_number}:#{row.join(",")}"
            raise
          end
        end
        convert_to_scale_and_date(ds, fields)
        ds.update_valid_data
        fields.each {|f|
          ds[f].name=f
        }
        ds.name=filename
        ds
      end
    end
  end
  module Mx
    class << self
      def write(dataset,filename,type=:covariance)
        puts "Writing MX File"
        File.open(filename,"w") do |fp|
          fp.puts "! #{filename}"
          fp.puts "! Output generated by Statsample"
          fp.puts "Data Ninput=#{dataset.fields.size} Nobservations=#{dataset.cases}"
          fp.puts "Labels "+dataset.fields.join(" ")
          case type
            when :raw
            fp.puts "Rectangular"
            dataset.each do |row|
              out=dataset.fields.collect do |f|
                if dataset[f].is_valid? row[f]
                  row[f]
                else
                  "."
                end
              end
              fp.puts out.join("\t")
            end
            fp.puts "End Rectangular"
          when :covariance
            fp.puts " CMatrix Full"
            cm=Statsample::Bivariate.covariance_matrix(dataset)
            d=(0...(cm.row_size)).collect {|row|
              (0...(cm.column_size)).collect{|col|
                cm[row,col].nil? ? "." : sprintf("%0.3f", cm[row,col])
              }.join(" ")
            }.join("\n")
            fp.puts d
          end
        end
      end
    end
  end
	module GGobi
		class << self
      def write(dataset,filename,opt={})
        File.open(filename,"w") {|fp|
          fp.write(self.out(dataset,opt))
        }
      end
			def out(dataset,opt={})
				require 'ostruct'
				default_opt = {:dataname => "Default", :description=>"", :missing=>"NA"}
				default_opt.merge! opt
				carrier=OpenStruct.new
				carrier.categorials=[]
				carrier.conversions={}
				variables_def=dataset.fields.collect{|k|
					variable_definition(carrier,dataset[k],k)
				}.join("\n")
				
				indexes=carrier.categorials.inject({}) {|s,c|
					s[dataset.fields.index(c)]=c
					s
				}
				records=""
				dataset.each_array {|c|
					indexes.each{|ik,iv|
						c[ik]=carrier.conversions[iv][c[ik]]
					}
					records << "<record>#{values_definition(c, default_opt[:missing])}</record>\n"
				}
				
out=<<EOC
<?xml version="1.0"?>
<!DOCTYPE ggobidata SYSTEM "ggobi.dtd">
<ggobidata count="1">
<data name="#{default_opt[:dataname]}">
<description>#{default_opt[:description]}</description>
<variables count="#{dataset.fields.size}">
#{variables_def}
</variables>
    <records count="#{dataset.cases}" missingValue="#{default_opt[:missing]}">
#{records}
</records>

</data>
</ggobidata>
EOC

out

			end
      def values_definition(c,missing)
        c.collect{|v|
          if v.nil?
            "#{missing}"
          elsif v.is_a? Numeric
            "#{v}"
          else
            "#{v.gsub(/\s+/,"_")}"
          end
        }.join(" ")
      end
			# Outputs a string for a variable definition
			# v = vector
			# name = name of the variable
			# nickname = nickname
			def variable_definition(carrier,v,name,nickname=nil)
				nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
				if v.type==:nominal or v.data.find {|d|  d.is_a? String }
					carrier.categorials.push(name)
					carrier.conversions[name]={}
					factors=v.factors
					out ="<categoricalvariable name=\"#{name}\" #{nickname}>\n"
					out << "<levels count=\"#{factors.size}\">\n"
					out << (1..factors.size).to_a.collect{|i|
						carrier.conversions[name][factors[i-1]]=i
						"<level value=\"#{i}\">#{v.labeling(factors[i-1])}</level>"
					}.join("\n")
					out << "</levels>\n</categoricalvariable>\n"
					out
				elsif v.data.find {|d| d.is_a? Float}
					"<realvariable name=\"#{name}\" #{nickname} />"
				else
					"<integervariable name=\"#{name}\" #{nickname} />"
				end
			end

		end
	end
end

require 'statsample/converter/csv.rb'


================================================
FILE: lib/statsample/crosstab.rb
================================================
module Statsample
	# Class to create crosstab of data
	# With this, you can create reports and do chi square test
	# The first vector will be at rows and the second will the the columns
	#
  class Crosstab
    include Summarizable
    attr_reader :v_rows, :v_cols
    attr_accessor :row_label, :column_label, :name, :percentage_row, :percentage_column, :percentage_total
    def initialize(v1, v2, opts=Hash.new)
      #raise ArgumentError, "Both arguments should be Vectors" unless v1.is_a? Statsample::Vector and v2.is_a? Statsample::Vector
      raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size
      @v_rows, @v_cols=Statsample.only_valid_clone(v1.to_vector,v2.to_vector)
      @cases=@v_rows.size
      @row_label=v1.name
      @column_label=v2.name
      @name=nil
      @percentage_row = @percentage_column = @percentage_total=false
      opts.each{|k,v|
        self.send("#{k}=",v) if self.respond_to? k
      }
      @name||=_("Crosstab %s - %s") % [@row_label, @column_label]
    end	
    def rows_names
      @v_rows.factors.sort
    end
    def cols_names
      @v_cols.factors.sort
    end
    def rows_total
      @v_rows.frequencies
    end
    def cols_total
      @v_cols.frequencies
    end
    
    def frequencies
      base=rows_names.inject([]){|s,row| 
        s+=cols_names.collect{|col| [row,col]}
      }.inject({}) {|s,par|
        s[par]=0
        s
      }
      base.update(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a.to_vector.frequencies)
    end
    def to_matrix
      f=frequencies
      rn=rows_names
      cn=cols_names
      Matrix.rows(rn.collect{|row|
          cn.collect{|col| f[[row,col]]}
      })
    end
    def frequencies_by_row
    f=frequencies
    rows_names.inject({}){|sr,row|
      sr[row]=cols_names.inject({}) {|sc,col| sc[col]=f[[row,col]]; sc}
      sr
    }
    end
    def frequencies_by_col
      f=frequencies
      cols_names.inject({}){|sc,col| 
        sc[col]=rows_names.inject({}) {|sr,row| sr[row]=f[[row,col]]; sr}
        sc
      }
    end
    # Chi square, based on expected and real matrix
    def chi_square
        require 'statsample/test'
        Statsample::Test.chi_square(self.to_matrix, matrix_expected)
    end
    # Useful to obtain chi square
    def matrix_expected
      rn=rows_names
      cn=cols_names
      rt=rows_total
      ct=cols_total
      t=@v_rows.size
      m=rn.collect{|row|
        cn.collect{|col|
          (rt[row]*ct[col]).quo(t) 
          }
      }
      Matrix.rows(m)
    end
    def cols_empty_hash
      cols_names.inject({}) {|a,x| a[x]=0;a}
    end
    def report_building(builder)
      builder.section(:name=>@name) do |generator|
        fq=frequencies
        rn=rows_names
        cn=cols_names
        total=0
        total_cols=cols_empty_hash
        generator.text "Chi Square: #{chi_square}"
        generator.text(_("Rows: %s") % @row_label) unless @row_label.nil?
        generator.text(_("Columns: %s") % @column_label) unless @column_label.nil?
        
        t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c)}+[_("Total")])
        rn.each do |row|
          total_row=0
          t_row=[@v_rows.labeling(row)]
          cn.each do |col|
            data=fq[[row,col]]
            total_row+=fq[[row,col]]
            total+=fq[[row,col]]                    
            total_cols[col]+=fq[[row,col]]                    
            t_row.push(data)
          end
          t_row.push(total_row)
          t.row(t_row)
        end
        t.hr
        t_row=[_("Total")]
        cn.each do |v|
          t_row.push(total_cols[v])
        end
        t_row.push(total)
        t.row(t_row)
        generator.parse_element(t)
        
        if(@percentage_row)
          table_percentage(generator,:row)
        end
        if(@percentage_column)
        table_percentage(generator,:column)
        end
        if(@percentage_total)
        table_percentage(generator,:total)
        end
      end
    end
      
    
    def table_percentage(generator,type)
      fq=frequencies
      cn=cols_names
      rn=rows_names
      rt=rows_total
      ct=cols_total
      
      type_name=case type
        when :row     then  _("% Row")
        when :column  then  _("% Column")
        when :total   then  _("% Total")
      end
      
      t=ReportBuilder::Table.new(:name=>@name+" - "+_(type_name), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c) } + [_("Total")])
        rn.each do |row|
          t_row=[@v_rows.labeling(row)]
          cn.each do |col|
            total=case type
              when :row     then  rt[row]
              when :column  then  ct[col]
              when :total   then  @cases
            end
            data = sprintf("%0.2f%%", fq[[row,col]]*100.0/ total )
            t_row.push(data)
          end
          total=case type
            when :row     then  rt[row]
            when :column  then  @cases
            when :total   then  @cases
          end              
          t_row.push(sprintf("%0.2f%%", rt[row]*100.0/total))
          t.row(t_row)
        end
        
        t.hr
        t_row=[_("Total")]
        cn.each{|col|
          total=case type
            when :row     then  @cases
            when :column  then  ct[col]
            when :total   then  @cases
          end
          t_row.push(sprintf("%0.2f%%", ct[col]*100.0/total))
        }
      t_row.push("100%")
      t.row(t_row)
      generator.parse_element(t)
    end
  end
end


================================================
FILE: lib/statsample/dataset.rb
================================================
require 'statsample/vector'

class Hash
  # Creates a Statsample::Dataset based on a Hash 
  def to_dataset(*args)
    Statsample::Dataset.new(self, *args)
  end
end

class Array
  def prefix(s) # :nodoc:
    self.collect{|c| s+c.to_s }
  end
  def suffix(s) # :nodoc:
    self.collect{|c| c.to_s+s }
  end
end

module Statsample
  class DatasetException < RuntimeError # :nodoc:
    attr_reader :ds,:exp
    def initialize(ds,e)
      @ds=ds
      @exp=e
    end
    def to_s
      m="Error on iteration: "+@exp.message+"\n"+@exp.backtrace.join("\n")
      m+="\nRow ##{@ds.i}:#{@ds.case_as_hash(@ds.i)}" unless @ds.i.nil?
      m
    end
  end
  # Set of cases with values for one or more variables, 
  # analog to a dataframe on R or a standard data file of SPSS.
  # Every vector has <tt>#field</tt> name, which represent it. By default,
  # the vectors are ordered by it field name, but you can change it 
  # the fields order manually.
  # The Dataset work as a Hash, with keys are field names
  # and values are Statsample::Vector  
  # 
  # 
  # ==Usage
  # Create a empty dataset:
  #   Dataset.new()
  # Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>:
  #   Dataset.new(%w{v1 v2 v3})
  # Create a dataset with two vectors, called <tt>v1</tt>
  # and <tt>v2</tt>:
  #   Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
  # Create a dataset with two given vectors (v1 and v2), 
  # with vectors on inverted order:
  #   Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
  #
  # The fast way to create a dataset uses Hash#to_dataset, with
  # field order  as arguments
  #   v1 = [1,2,3].to_scale
  #   v2 = [1,2,3].to_scale
  #   ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})  
  
  class Dataset
    include Writable
    include Summarizable
    # Hash of Statsample::Vector
    attr_reader :vectors
    # Ordered ids of vectors
    attr_reader :fields
    # Name of dataset
    attr_accessor :name
    # Number of cases
    attr_reader :cases
    # Location of pointer on enumerations methods (like #each)
    attr_reader :i

    # Generates a new dataset, using three vectors
    # - Rows
    # - Columns
    # - Values
    #
    # For example, you have these values
    #
    #   x   y   v
    #   a   a   0
    #   a   b   1
    #   b   a   1
    #   b   b   0
    #
    # You obtain
    #   id  a   b
    #    a  0   1
    #    b  1   0
    #
    # Useful to process outputs from databases
    def self.crosstab_by_asignation(rows,columns,values)
      raise "Three vectors should be equal size" if rows.size!=columns.size or rows.size!=values.size
      cols_values=columns.factors
      cols_n=cols_values.size
      h_rows=rows.factors.inject({}){|a,v| a[v]=cols_values.inject({}){
        |a1,v1| a1[v1]=nil; a1
        }
        ;a}
      values.each_index{|i|
        h_rows[rows[i]][columns[i]]=values[i]
      }      
      ds=Dataset.new(["_id"]+cols_values)
      cols_values.each{|c|
        ds[c].type=values.type
      }
      rows.factors.each {|row|
        n_row=Array.new(cols_n+1)
        n_row[0]=row
          cols_values.each_index {|i|
            n_row[i+1]=h_rows[row][cols_values[i]]
        }
        ds.add_case_array(n_row)
      }
      ds.update_valid_data
      ds
    end
    # Return true if any vector has missing data
    def has_missing_data?
      @vectors.any? {|k,v| v.has_missing_data?}
    end
    # Return a nested hash using fields as keys and
    # an array constructed of hashes with other values.
    # If block provided, is used to provide the 
    # values, with parameters +row+ of dataset, 
    # +current+ last hash on hierarchy and
    # +name+ of the key to include
    def nest(*tree_keys,&block)
      tree_keys=tree_keys[0] if tree_keys[0].is_a? Array
      out=Hash.new      
      each do |row|
        current=out        
        # Create tree
        tree_keys[0,tree_keys.size-1].each do |f|
          root=row[f]
          current[root]||=Hash.new
          current=current[root]
        end
        name=row[tree_keys.last]
        if !block
          current[name]||=Array.new
          current[name].push(row.delete_if{|key,value| tree_keys.include? key})
        else
          current[name]=block.call(row, current,name)
        end
      end
      out
    end
    # Creates a new dataset. A dataset is a set of ordered named vectors
    # of the same size.
    #
    # [vectors] With an array, creates a set of empty vectors named as
    # values on the array. With a hash, each Vector is assigned as
    # a variable of the Dataset named as its key
    # [fields]  Array of names for vectors. Is only used for set the
    # order of variables. If empty, vectors keys on alfabethic order as
    # used as fields.
    def initialize(vectors={}, fields=[])
      @@n_dataset||=0
      @@n_dataset+=1
      @name=_("Dataset %d") % @@n_dataset
      @cases=0
      @gsl=nil
      @i=nil
      
      if vectors.instance_of? Array
        @fields=vectors.dup
        @vectors=vectors.inject({}){|a,x| a[x]=Statsample::Vector.new(); a}
      else
        # Check vectors
        @vectors=vectors
        @fields=fields
        check_order
        check_length
      end
    end
    # 
    # Creates a copy of the given dataset, deleting all the cases with
    # missing data on one of the vectors.
    # 
    # @param array of fields to include. No value include all fields
    #
    def dup_only_valid(*fields_to_include)
      if fields_to_include.size==1 and fields_to_include[0].is_a? Array
        fields_to_include=fields_to_include[0]
      end
      fields_to_include=@fields if fields_to_include.size==0
      if fields_to_include.any? {|f| @vectors[f].has_missing_data?}
        ds=Dataset.new(fields_to_include)
        fields_to_include.each {|f| ds[f].type=@vectors[f].type}
        each {|row|
          unless fields_to_include.any? {|f| @vectors[f].has_missing_data? and !@vectors[f].is_valid? row[f]}
            row_2=fields_to_include.inject({}) {|ac,v| ac[v]=row[v]; ac}
            ds.add_case(row_2)
          end
        }
      else
        ds=dup fields_to_include
      end
      ds.name= self.name
      ds
    end
    #
    # Returns a duplicate of the Dataset. 
    # All vectors are copied, so any modification on new
    # dataset doesn't affect original dataset's vectors.
    # If fields given as parameter, only include those vectors.
    #
    # @param array of fields to include. No value include all fields    
    # @return {Statsample::Dataset}
    def dup(*fields_to_include)
      if fields_to_include.size==1 and fields_to_include[0].is_a? Array
        fields_to_include=fields_to_include[0]
      end
      fields_to_include=@fields if fields_to_include.size==0
      vectors={}
      fields=[]
      fields_to_include.each{|f|
        raise "Vector #{f} doesn't exists" unless @vectors.has_key? f
        vectors[f]=@vectors[f].dup
        fields.push(f)
      }
      ds=Dataset.new(vectors,fields)
      ds.name= self.name
      ds
    end
    
    
    # Returns an array with the fields from first argumen to last argument
    def from_to(from,to)
      raise ArgumentError, "Field #{from} should be on dataset" if !@fields.include? from
      raise ArgumentError, "Field #{to} should be on dataset" if !@fields.include? to
      @fields.slice(@fields.index(from)..@fields.index(to))
    end
    
    # Returns (when possible) a cheap copy of dataset.
    # If no vector have missing values, returns original vectors.
    # If missing values presents, uses Dataset.dup_only_valid.
    #
    # @param array of fields to include. No value include all fields
    # @return {Statsample::Dataset}
    def clone_only_valid(*fields_to_include)
      if fields_to_include.size==1 and fields_to_include[0].is_a? Array
        fields_to_include=fields_to_include[0]
      end
      fields_to_include=@fields.dup if fields_to_include.size==0
      if fields_to_include.any? {|v| @vectors[v].has_missing_data?}
        dup_only_valid(fields_to_include)
      else
        clone(fields_to_include)
      end
    end
    # Returns a shallow copy of Dataset.
    # Object id will be distinct, but @vectors will be the same.
    # @param array of fields to include. No value include all fields
    # @return {Statsample::Dataset}    
    def clone(*fields_to_include)
      if fields_to_include.size==1 and fields_to_include[0].is_a? Array
        fields_to_include=fields_to_include[0]
      end
      fields_to_include=@fields.dup if fields_to_include.size==0
      ds=Dataset.new
      fields_to_include.each{|f|
        raise "Vector #{f} doesn't exists" unless @vectors.has_key? f
        ds[f]=@vectors[f]
      }
      ds.fields=fields_to_include
      ds.name=@name
      ds.update_valid_data
      ds
    end
    # Creates a copy of the given dataset, without data on vectors
    #
    # @return {Statsample::Dataset}
    def dup_empty
      vectors=@vectors.inject({}) {|a,v|
        a[v[0]]=v[1].dup_empty
        a
      }
      Dataset.new(vectors,@fields.dup)
    end
    # Merge vectors from two datasets
    # In case of name collition, the vectors names are changed to 
    # x_1, x_2 ....
    #
    # @return {Statsample::Dataset}
    def merge(other_ds)
      raise "Cases should be equal (this:#{@cases}; other:#{other_ds.cases}" unless @cases==other_ds.cases
      types = @fields.collect{|f| @vectors[f].type} + other_ds.fields.collect{|f| other_ds[f].type}
      new_fields = (@fields+other_ds.fields).recode_repeated
      ds_new=Statsample::Dataset.new(new_fields)
      new_fields.each_index{|i|
        field=new_fields[i]
        ds_new[field].type=types[i]
      }
      @cases.times {|i|
        row=case_as_array(i)+other_ds.case_as_array(i)
        ds_new.add_case_array(row)
      }
      ds_new.update_valid_data
      ds_new
    end

    # Join 2 Datasets by given fields
    # type is one of :left and :inner, default is :left
    #
    # @return {Statsample::Dataset}
    def join(other_ds,fields_1=[],fields_2=[],type=:left)
      fields_new = other_ds.fields - fields_2
      fields = self.fields + fields_new

      other_ds_hash = {}
      other_ds.each do |row|
        key = row.select{|k,v| fields_2.include?(k)}.values
        value = row.select{|k,v| fields_new.include?(k)}
        if other_ds_hash[key].nil?
          other_ds_hash[key] = [value]
        else
          other_ds_hash[key] << value
        end
      end

      new_ds = Dataset.new(fields)

      self.each do |row|
        key = row.select{|k,v| fields_1.include?(k)}.values

        new_case = row.dup

        if other_ds_hash[key].nil?
          if type == :left
            fields_new.each{|field| new_case[field] = nil}
            new_ds.add_case(new_case)
          end
        else
          other_ds_hash[key].each do |new_values|
            new_ds.add_case new_case.merge(new_values)
          end
        end

      end
      new_ds
    end
    # Returns a dataset with standarized data.
    #
    # @return {Statsample::Dataset}
    def standarize
      ds=dup()
      ds.fields.each do |f|
        ds[f]=ds[f].vector_standarized
      end
      ds
    end
    # Generate a matrix, based on fields of dataset
    #
    # @return {::Matrix}
    
    def collect_matrix
      rows=@fields.collect{|row|
        @fields.collect{|col|
          yield row,col
        }
      }
      Matrix.rows(rows)
    end
    
    # We have the same datasets if +vectors+ and +fields+ are the same
    #
    # @return {Boolean}
    def ==(d2)
      @vectors==d2.vectors and @fields==d2.fields
    end
    # Returns vector <tt>c</tt>
    # 
    # @return {Statsample::Vector}
    def col(c)
      @vectors[c]
    end
    alias_method :vector, :col
    # Equal to Dataset[<tt>name</tt>]=<tt>vector</tt>
    #
    # @return self
    def add_vector(name, vector)
      raise ArgumentError, "Vector have different size" if vector.size!=@cases
      @vectors[name]=vector
      check_order
      self
    end
    # Returns true if dataset have vector <tt>v</tt>.
    #
    # @return {Boolean}
    def has_vector? (v)
      return @vectors.has_key?(v)
    end
    # Creates a dataset with the random data, of a n size
    # If n not given, uses original number of cases.
    #
    # @return {Statsample::Dataset}
    def bootstrap(n=nil)
      n||=@cases
      ds_boot=dup_empty
      n.times do
        ds_boot.add_case_array(case_as_array(rand(n)))
      end
      ds_boot.update_valid_data
      ds_boot
    end
    # Fast version of #add_case.
    # Can only add one case and no error check if performed
    # You SHOULD use #update_valid_data at the end of insertion cycle
    #
    # 
    def add_case_array(v)
      v.each_index {|i| d=@vectors[@fields[i]].data; d.push(v[i])}
    end
    # Insert a case, using:
    # * Array: size equal to number of vectors and values in the same order as fields
    # * Hash: keys equal to fields
    # If uvd is false, #update_valid_data is not executed after 
    # inserting a case. This is very useful if you want to increase the 
    # performance on inserting many cases,  because #update_valid_data 
    # performs check on vectors and on the dataset
    
    def add_case(v,uvd=true)
      case v
      when Array
        if (v[0].is_a? Array)
          v.each{|subv| add_case(subv,false)}
        else
          raise ArgumentError, "Input array size (#{v.size}) should be equal to fields number (#{@fields.size})" if @fields.size!=v.size
          v.each_index {|i| @vectors[@fields[i]].add(v[i],false)}
        end
      when Hash
        raise ArgumentError, "Hash keys should be equal to fields #{(v.keys - @fields).join(",")}" if @fields.sort!=v.keys.sort
        @fields.each{|f| @vectors[f].add(v[f],false)}
      else
        raise TypeError, 'Value must be a Array or a Hash'
      end
      if uvd
        update_valid_data
      end
    end
    # Check vectors and fields after inserting data. Use only 
    # after  #add_case_array or #add_case with second parameter to false
    def update_valid_data
      @gsl=nil
      @fields.each{|f| @vectors[f].set_valid_data}
      check_length
    end
    # Delete vector named +name+. Multiple fields accepted.
    def delete_vector(*args)
      if args.size==1 and args[0].is_a? Array
        names=args[0]
      else
        names=args
      end
      names.each do |name|
        @fields.delete(name)
        @vectors.delete(name)
      end
    end
    
    def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT_TOKEN)
      split=@vectors[name_].split_by_separator(sep)
      i=1
      split.each{|k,v|
        new_field=name_+join+i.to_s
        v.name=name_+":"+k
        add_vector(new_field,v)
        i+=1
      }
    end
    def add_vectors_by_split(name,join='-',sep=Statsample::SPLIT_TOKEN)
      split=@vectors[name].split_by_separator(sep)
      split.each{|k,v|
        add_vector(name+join+k,v)
      }
    end
    
    def vector_by_calculation(type=:scale)
      a=[]
      each do |row|
        a.push(yield(row))
      end
      a.to_vector(type)
    end
    # Returns a vector with sumatory of fields
    # if fields parameter is empty, sum all fields 
    def vector_sum(fields=nil)
      fields||=@fields
      vector=collect_with_index do |row, i|
        if(fields.find{|f| !@vectors[f].data_with_nils[i]})
          nil
        else
          fields.inject(0) {|ac,v| ac + row[v].to_f}
        end
      end
      vector.name=_("Sum from %s") % @name
      vector
    end
    # Check if #fields attribute is correct, after inserting or deleting vectors
    def check_fields(fields)
      fields||=@fields
      raise "Fields #{(fields-@fields).join(", ")} doesn't exists on dataset" if (fields-@fields).size>0
      fields
    end
    
    # Returns a vector with the numbers of missing values for a case
    def vector_missing_values(fields=nil)
      fields=check_fields(fields)
      collect_with_index do |row, i|
        fields.inject(0) {|a,v|
          a+ ((@vectors[v].data_with_nils[i].nil?) ? 1: 0)
        }
      end
    end
    def vector_count_characters(fields=nil)
      fields=check_fields(fields)
      collect_with_index do |row, i|
        fields.inject(0){|a,v|
          a+((@vectors[v].data_with_nils[i].nil?) ? 0: row[v].to_s.size)
        }
      end
    end
    # Returns a vector with the mean for a set of fields
    # if fields parameter is empty, return the mean for all fields
    # if max invalid parameter > 0, returns the mean for all tuples
    # with 0 to max_invalid invalid fields
    def vector_mean(fields=nil, max_invalid=0)
      a=[]
      fields=check_fields(fields)
      size=fields.size
      each_with_index do |row, i |
        # numero de invalidos
        sum=0
        invalids=0
        fields.each{|f|
          if !@vectors[f].data_with_nils[i].nil?
            sum+=row[f].to_f
          else
            invalids+=1
          end
        }
        if(invalids>max_invalid)
          a.push(nil)
        else
          a.push(sum.quo(size-invalids))
        end
      end
      a=a.to_vector(:scale)
      a.name=_("Means from %s") % @name
      a
    end
    # Check vectors for type and size.
    def check_length # :nodoc:
      size=nil
      @vectors.each do |k,v|
        raise Exception, "Data #{v.class} is not a vector on key #{k}" if !v.is_a? Statsample::Vector
        if size.nil?
          size=v.size
        else
          if v.size!=size
            raise Exception, "Vector #{k} have size #{v.size} and dataset have size #{size}"
          end
        end
      end
      @cases=size
    end
    # Retrieves each vector as [key, vector]
    def each_vector # :yield: |key, vector|
      @fields.each{|k| yield k, @vectors[k]}
    end
    
    if Statsample::STATSAMPLE__.respond_to?(:case_as_hash)
      def case_as_hash(c) # :nodoc:
        Statsample::STATSAMPLE__.case_as_hash(self,c)
      end
    else
      # Retrieves case i as a hash
      def case_as_hash(i)
        _case_as_hash(i)
      end
    end

    if Statsample::STATSAMPLE__.respond_to?(:case_as_array)
      def case_as_array(c) # :nodoc:
        Statsample::STATSAMPLE__.case_as_array(self,c)
      end
    else
      # Retrieves case i as a array, ordered on #fields order
      def case_as_array(i)
        _case_as_array(i)
      end
    end
    def _case_as_hash(c) # :nodoc:
      @fields.inject({}) {|a,x| a[x]=@vectors[x][c];a }
    end
    def _case_as_array(c) # :nodoc:
      @fields.collect {|x| @vectors[x][c]}
    end
    
    # Returns each case as a hash
    def each
      begin
        @i=0
        @cases.times {|i|
          @i=i
          row=case_as_hash(i)
          yield row
        }
        @i=nil
      rescue =>e
        raise DatasetException.new(self, e)
      end
    end
    
    # Returns each case as hash and index
    def each_with_index # :yield: |case, i|
      begin
        @i=0
        @cases.times{|i|
          @i=i
          row=case_as_hash(i)
          yield row, i
        }
        @i=nil
      rescue =>e
        raise DatasetException.new(self, e)
      end
    end
    
    # Returns each case as an array, coding missing values as nils
    def each_array_with_nils
      m=fields.size
      @cases.times {|i|
        @i=i
        row=Array.new(m)
        fields.each_index{|j|
          f=fields[j]
          row[j]=@vectors[f].data_with_nils[i]
        }
        yield row
      }
      @i=nil
    end
    # Returns each case as an array
    def each_array
      @cases.times {|i|
        @i=i
        row=case_as_array(i)
        yield row
      }
      @i=nil
    end
    # Set fields order. If you omit one or more vectors, they are
    # ordered by alphabetic order.
    def fields=(f)
      @fields=f
      check_order
    end
    # Check congruence between +fields+ attribute
    # and keys on +vectors
    def check_order #:nodoc:
      if(@vectors.keys.sort!=@fields.sort)
        @fields=@fields&@vectors.keys
        @fields+=@vectors.keys.sort-@fields
      end
    end
    # Returns the vector named i
    def[](i)
      if i.is_a? Range
        fields=from_to(i.begin,i.end)
        clone(*fields)
      elsif i.is_a? Array
        clone(i)
      else
        raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
        @vectors[i]
      end
    end
    # Retrieves a Statsample::Vector, based on the result
    # of calculation performed on each case.
    def collect(type=:scale)
      data=[]
      each {|row|
        data.push yield(row)
      }
      Statsample::Vector.new(data,type)
    end
    # Same as Statsample::Vector.collect, but giving case index as second parameter on yield.
    def collect_with_index(type=:scale)
      data=[]
      each_with_index {|row, i|
        data.push(yield(row, i))
      }
      Statsample::Vector.new(data,type)
    end
    # Recode a vector based on a block
    def recode!(vector_name)
      0.upto(@cases-1) {|i|
        @vectors[vector_name].data[i]=yield case_as_hash(i)
      }
      @vectors[vector_name].set_valid_data
    end
    
    def crosstab(v1,v2,opts={})
      Statsample::Crosstab.new(@vectors[v1], @vectors[v2],opts)
    end
    def[]=(i,v)
      if v.instance_of? Statsample::Vector
        @vectors[i]=v
        check_order
      else
        raise ArgumentError,"Should pass a Statsample::Vector"
      end
    end
    # Return data as a matrix. Column are ordered by #fields and 
    # rows by orden of insertion
    def to_matrix
      rows=[]
      self.each_array{|c|
        rows.push(c)
      }
      Matrix.rows(rows)
    end
    
    if Statsample.has_gsl?
      def clear_gsl
        @gsl=nil
      end
      
      def to_gsl
        if @gsl.nil?
          if cases.nil?
            update_valid_data
          end
          @gsl=GSL::Matrix.alloc(cases,fields.size)
          self.each_array{|c|
            @gsl.set_row(@i,c)
          }
        end
        @gsl
      end
      
    end
    
    # Return a correlation matrix for fields included as parameters.
    # By default, uses all fields of dataset
	def correlation_matrix(fields=nil)
      if fields
        ds=clone(fields)
      else
        ds=self
      end
      Statsample::Bivariate.correlation_matrix(ds)
    end
   # Return a correlation matrix for fields included as parameters.
    # By default, uses all fields of dataset
	def covariance_matrix(fields=nil)
      if fields
        ds=clone(fields)
      else
        ds=self
      end
      Statsample::Bivariate.covariance_matrix(ds)
    end
    
    # Create a new dataset with all cases which the block returns true
    def filter
      ds=self.dup_empty
      each {|c|
        ds.add_case(c, false) if yield c
      }
      ds.update_valid_data
      ds.name=_("%s(filtered)") % @name
      ds
    end
    
    # creates a new vector with the data of a given field which the block returns true
    def filter_field(field)
      a=[]
      each do |c|
        a.push(c[field]) if yield c
      end
      a.to_vector(@vectors[field].type)
    end
    
    # Creates a Stastample::Multiset, using one or more fields
    # to split the dataset.
    
   
    def to_multiset_by_split(*fields)
			require 'statsample/multiset'
			if fields.size==1
				to_multiset_by_split_one_field(fields[0])
			else
				to_multiset_by_split_multiple_fields(*fields)
			end
    end
    # Creates a Statsample::Multiset, using one field
    
    def to_multiset_by_split_one_field(field)
      raise ArgumentError,"Should use a correct field name" if !@fields.include? field
      factors=@vectors[field].factors
      ms=Multiset.new_empty_vectors(@fields, factors)
      each {|c|
        ms[c[field]].add_case(c,false)
      }
      #puts "Ingreso a los dataset"
      ms.datasets.each {|k,ds|
        ds.update_valid_data
        ds.name=@vectors[field].labeling(k)
        ds.vectors.each{|k1,v1|
          #        puts "Vector #{k1}:"+v1.to_s
          v1.type=@vectors[k1].type
          v1.name=@vectors[k1].name
          v1.labels=@vectors[k1].labels
          
        }
      }
      ms
    end
    def to_multiset_by_split_multiple_fields(*fields)
      factors_total=nil
      fields.each do |f|
        if factors_total.nil?
          factors_total=@vectors[f].factors.collect{|c|
            [c]
          }
        else
          suma=[]
          factors=@vectors[f].factors
          factors_total.each{|f1| factors.each{|f2| suma.push(f1+[f2]) } }
          factors_total=suma
        end
      end
      ms=Multiset.new_empty_vectors(@fields,factors_total)

      p1=eval "Proc.new {|c| ms[["+fields.collect{|f| "c['#{f}']"}.join(",")+"]].add_case(c,false) }"
      each{|c| p1.call(c)}
      
      ms.datasets.each do |k,ds|
        ds.update_valid_data
        ds.name=fields.size.times.map {|i|
          f=fields[i]
          sk=k[i]
          @vectors[f].labeling(sk)
        }.join("-")
        ds.vectors.each{|k1,v1| 
          v1.type=@vectors[k1].type
          v1.name=@vectors[k1].name
          v1.labels=@vectors[k1].labels
          
        }
      end
      ms
      
    end
    # Returns a vector, based on a string with a calculation based
    # on vector
    # The calculation will be eval'ed, so you can put any variable
    # or expression valid on ruby
    # For example:
    #   a=[1,2].to_vector(scale)
    #   b=[3,4].to_vector(scale)
    #   ds={'a'=>a,'b'=>b}.to_dataset
    #   ds.compute("a+b")
    #   => Vector [4,6]
    def compute(text)
      @fields.each{|f|
        if @vectors[f].type=:scale
          text.gsub!(f,"row['#{f}'].to_f")
        else
          text.gsub!(f,"row['#{f}']")
        end
      }
      collect_with_index {|row, i|
        invalid=false
        @fields.each{|f|
          if @vectors[f].data_with_nils[i].nil?
            invalid=true
          end
        }
        if invalid
          nil
        else
          eval(text)
        end
      }
    end
    # Test each row with one or more tests
    # each test is a Proc with the form
    #   Proc.new {|row| row['age']>0}
    # The function returns an array with all errors
    def verify(*tests)
      if(tests[0].is_a? String)
        id=tests[0]
        tests.shift
      else
        id=@fields[0]
      end
      vr=[]
      i=0
      each do |row|
        i+=1
        tests.each{|test|
          if ! test[2].call(row)
            values=""
            if test[1].size>0
              values=" ("+test[1].collect{|k| "#{k}=#{row[k]}"}.join(", ")+")"
            end
            vr.push("#{i} [#{row[id]}]: #{test[0]}#{values}")
          end
        }
      end
      vr
    end
    def to_s
      "#<"+self.class.to_s+":"+self.object_id.to_s+" @name=#{@name} @fields=["+@fields.join(",")+"] cases="+@vectors[@fields[0]].size.to_s
    end
    def inspect
      self.to_s
    end
    # Creates a new dataset for one to many relations
    # on a dataset, based on pattern of field names.
    # 
    # for example, you have a survey for number of children
    # with this structure:
    #   id, name, child_name_1, child_age_1, child_name_2, child_age_2
    # with 
    #   ds.one_to_many(%w{id}, "child_%v_%n"
    # the field of first parameters will be copied verbatim
    # to new dataset, and fields which responds to second 
    # pattern will be added one case for each different %n.
    # For example
    #   cases=[
    #     ['1','george','red',10,'blue',20,nil,nil],
    #     ['2','fred','green',15,'orange',30,'white',20],
    #     ['3','alfred',nil,nil,nil,nil,nil,nil]
    #   ]
    #   ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
    #   cases.each {|c| ds.add_case_array c }
    #   ds.one_to_many(['id'],'car_%v%n').to_matrix
    #   => Matrix[
    #      ["red", "1", 10], 
    #      ["blue", "1", 20],
    #      ["green", "2", 15],
    #      ["orange", "2", 30],
    #      ["white", "2", 20]
    #      ]
    # 
    def one_to_many(parent_fields, pattern)
      #base_pattern=pattern.gsub(/%v|%n/,"")
      re=Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
      ds_vars=parent_fields
      vars=[]
      max_n=0
      h=parent_fields.inject({}) {|a,v| a[v]=Statsample::Vector.new([], @vectors[v].type);a }
      # Adding _row_id
      h['_col_id']=[].to_scale
      ds_vars.push("_col_id")
      @fields.each do |f|
        if f=~re
          if !vars.include? $1
            vars.push($1) 
            h[$1]=Statsample::Vector.new([], @vectors[f].type)
          end
          max_n=$2.to_i if max_n < $2.to_i
        end
      end
      ds=Dataset.new(h,ds_vars+vars)
      each do |row|
        row_out={}
        parent_fields.each do |f|
          row_out[f]=row[f]
        end
        max_n.times do |n1|
          n=n1+1
          any_data=false
          vars.each do |v|
            data=row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)]
            row_out[v]=data
            any_data=true if !data.nil?
          end
          if any_data
            row_out["_col_id"]=n
            ds.add_case(row_out,false)
          end
          
        end
      end
      ds.update_valid_data
      ds
    end
    def report_building(b)
      b.section(:name=>@name) do |g|
        g.text _"Cases: %d"  % cases
        @fields.each do |f|
          g.text "Element:[#{f}]"
          g.parse_element(@vectors[f])
        end
      end
    end
  end
end


================================================
FILE: lib/statsample/dominanceanalysis/bootstrap.rb
================================================
module Statsample
  class DominanceAnalysis
    # == Goal
    # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
    #
    # == Usage
    # 
    #  require 'statsample'
    #  a=100.times.collect {rand}.to_scale
    #  b=100.times.collect {rand}.to_scale
    #  c=100.times.collect {rand}.to_scale
    #  d=100.times.collect {rand}.to_scale
    #  ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
    #  ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
    #  dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true)
    #  dab.bootstrap(100,nil)
    #  puts dab.summary
    # <strong>Output</strong>
    #   Sample size: 100
    #  t: 1.98421693632958
    #  
    #  Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
    #  Table: Bootstrap report
    #  --------------------------------------------------------------------------------------------
    #  | pairs                 | sD  | Dij    | SE(Dij) | Pij   | Pji   | Pno   | Reproducibility |
    #  --------------------------------------------------------------------------------------------
    #  | Complete dominance    |
    #  --------------------------------------------------------------------------------------------
    #  | a - b                 | 1.0 | 0.6150 | 0.454   | 0.550 | 0.320 | 0.130 | 0.550           |
    #  | a - c                 | 1.0 | 0.9550 | 0.175   | 0.930 | 0.020 | 0.050 | 0.930           |
    #  | a - d                 | 1.0 | 0.9750 | 0.131   | 0.960 | 0.010 | 0.030 | 0.960           |
    #  | b - c                 | 1.0 | 0.8800 | 0.276   | 0.820 | 0.060 | 0.120 | 0.820           |
    #  | b - d                 | 1.0 | 0.9250 | 0.193   | 0.860 | 0.010 | 0.130 | 0.860           |
    #  | c - d                 | 0.5 | 0.5950 | 0.346   | 0.350 | 0.160 | 0.490 | 0.490           |
    #  --------------------------------------------------------------------------------------------
    #  | Conditional dominance |
    #  --------------------------------------------------------------------------------------------
    #  | a - b                 | 1.0 | 0.6300 | 0.458   | 0.580 | 0.320 | 0.100 | 0.580           |
    #  | a - c                 | 1.0 | 0.9700 | 0.156   | 0.960 | 0.020 | 0.020 | 0.960           |
    #  | a - d                 | 1.0 | 0.9800 | 0.121   | 0.970 | 0.010 | 0.020 | 0.970           |
    #  | b - c                 | 1.0 | 0.8850 | 0.283   | 0.840 | 0.070 | 0.090 | 0.840           |
    #  | b - d                 | 1.0 | 0.9500 | 0.181   | 0.920 | 0.020 | 0.060 | 0.920           |
    #  | c - d                 | 0.5 | 0.5800 | 0.360   | 0.350 | 0.190 | 0.460 | 0.460           |
    #  --------------------------------------------------------------------------------------------
    #  | General Dominance     |
    #  --------------------------------------------------------------------------------------------
    #  | a - b                 | 1.0 | 0.6500 | 0.479   | 0.650 | 0.350 | 0.000 | 0.650           |
    #  | a - c                 | 1.0 | 0.9800 | 0.141   | 0.980 | 0.020 | 0.000 | 0.980           |
    #  | a - d                 | 1.0 | 0.9900 | 0.100   | 0.990 | 0.010 | 0.000 | 0.990           |
    #  | b - c                 | 1.0 | 0.9000 | 0.302   | 0.900 | 0.100 | 0.000 | 0.900           |
    #  | b - d                 | 1.0 | 0.9700 | 0.171   | 0.970 | 0.030 | 0.000 | 0.970           |
    #  | c - d                 | 1.0 | 0.5600 | 0.499   | 0.560 | 0.440 | 0.000 | 0.560           |
    #  --------------------------------------------------------------------------------------------
    #  
    #  Table: General averages
    #  ---------------------------------------
    #  | var | mean  | se    | p.5   | p.95  |
    #  ---------------------------------------
    #  | a   | 0.133 | 0.049 | 0.062 | 0.218 |
    #  | b   | 0.106 | 0.048 | 0.029 | 0.199 |
    #  | c   | 0.035 | 0.032 | 0.002 | 0.106 |
    #  | d   | 0.023 | 0.019 | 0.002 | 0.062 |
    #  ---------------------------------------
    #
    # == References:
    # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
    class Bootstrap
      include Writable
      include Summarizable
      # Total Dominance results
      attr_reader :samples_td
      # Conditional Dominance results
      attr_reader :samples_cd
      # General Dominance results
      attr_reader :samples_gd
      # General average results 
      attr_reader :samples_ga
      # Name of fields
      attr_reader :fields
      # Regression class used for analysis
      attr_accessor :regression_class
      # Dataset
      attr_accessor :ds
      # Name of analysis
      attr_accessor :name
      # Alpha level of confidence. Default: ALPHA
      attr_accessor :alpha
      # Debug?
      attr_accessor :debug
      # Default level of confidence for t calculation
      ALPHA=0.95
      # Create a new Dominance Analysis Bootstrap Object
      # 
      # * ds: A Dataset object
      # * y_var: Name of dependent variable
      # * opts: Any other attribute of the class 
      def initialize(ds,y_var, opts=Hash.new)
        @ds=ds
        @y_var=y_var
        @n=ds.cases
        
        @n_samples=0
        @alpha=ALPHA
        @debug=false
        if y_var.is_a? Array
          @fields=ds.fields-y_var
          @regression_class=Regression::Multiple::MultipleDependent
          
        else
          @fields=ds.fields-[y_var]
          @regression_class=Regression::Multiple::MatrixEngine
        end
        @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}

        @name=_("Bootstrap dominance Analysis:  %s over %s") % [ ds.fields.join(",") , @y_var]
        opts.each{|k,v|
          self.send("#{k}=",v) if self.respond_to? k
        }
        create_samples_pairs            
      end
      # lr_class deprecated
      alias_method :lr_class, :regression_class
      def da
        if @da.nil?
          @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
        end
        @da
      end
      # Creates n re-samples from original dataset and store result of
      # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
      # 
      # * number_samples: Number of new samples to add
      # * n: size of each new sample. If nil, equal to original sample size
      
      def bootstrap(number_samples,n=nil)
        number_samples.times{ |t|
          @n_samples+=1
          puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
          ds_boot=@ds.bootstrap(n)
          da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
          
          da_1.total_dominance.each{|k,v|
            @samples_td[k].push(v)
          }
          da_1.conditional_dominance.each{|k,v|
            @samples_cd[k].push(v)
          }
          da_1.general_dominance.each{|k,v|
            @samples_gd[k].push(v)
          }
          da_1.general_averages.each{|k,v|
            @samples_ga[k].push(v)
          }
        }
      end
      def create_samples_pairs
        @samples_td={}
        @samples_cd={}
        @samples_gd={}
        @pairs=[]
        c=(0...@fields.size).to_a.combination(2)
        c.each do |data|
          p data
          convert=data.collect {|i| @fields[i] }
          @pairs.push(convert)
          [@samples_td, @samples_cd, @samples_gd].each{|s|
            s[convert]=[]
          }
        end
      end
      def t
        Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
      end
      def report_building(builder) # :nodoc:
        raise "You should bootstrap first" if @n_samples==0
        builder.section(:name=>@name) do |generator|
          generator.text _("Sample size: %d\n") % @n_samples
          generator.text "t: #{t}\n"
          generator.text _("Linear Regression Engine: %s") % @regression_class.name
          
          table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
          table.row([_("Complete dominance"),"","","","","","",""])
          table.hr
          @pairs.each{|pair|
            std=@samples_td[pair].to_vector(:scale)
            ttd=da.total_dominance_pairwise(pair[0],pair[1])
            table.row(summary_pairs(pair,std,ttd))
          }
          table.hr
          table.row([_("Conditional dominance"),"","","","","","",""])
          table.hr
          @pairs.each{|pair|
            std=@samples_cd[pair].to_vector(:scale)
            ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
            table.row(summary_pairs(pair,std,ttd))
          
          }
          table.hr
          table.row([_("General Dominance"),"","","","","","",""])
          table.hr
          @pairs.each{|pair|
            std=@samples_gd[pair].to_vector(:scale)
            ttd=da.general_dominance_pairwise(pair[0],pair[1])
            table.row(summary_pairs(pair,std,ttd))
          }
          generator.parse_element(table)
          
          table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
          
          @fields.each{|f|
            v=@samples_ga[f].to_vector(:scale)
            row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
            table.row(row)
          
          }
          
          generator.parse_element(table)
        end
      end
      def summary_pairs(pair,std,ttd)
          freqs=std.proportions
          [0, 0.5, 1].each{|n|
              freqs[n]=0 if freqs[n].nil?
          }
          name="%s - %s" % [@ds[pair[0]].name, @ds[pair[1]].name]
          [name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
      end
      def f(v,n=3)
          prec="%0.#{n}f"
          sprintf(prec,v)
      end
    end
  end
end


================================================
FILE: lib/statsample/dominanceanalysis.rb
================================================
module Statsample
  # Dominance Analysis is a procedure based on an examination of the R<sup>2</sup> values
  # for all possible subset models, to identify the relevance of one or more 
  # predictors in the prediction of criterium.
  #
  # See Budescu(1993), Azen & Budescu (2003, 2006) for more information.
  #
  # == Use
  #
  #  a=1000.times.collect {rand}.to_scale
  #  b=1000.times.collect {rand}.to_scale
  #  c=1000.times.collect {rand}.to_scale
  #  ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
  #  ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
  #  da=Statsample::DominanceAnalysis.new(ds,'y')
  #  puts da.summary
  # 
  # === Output:
  #
  #  Report: Report 2010-02-08 19:10:11 -0300
  #  Table: Dominance Analysis result
  #  ------------------------------------------------------------
  #  |                  | r2    | sign  | a     | b     | c     |
  #  ------------------------------------------------------------
  #  | Model 0          |       |       | 0.648 | 0.265 | 0.109 |
  #  ------------------------------------------------------------
  #  | a                | 0.648 | 0.000 | --    | 0.229 | 0.104 |
  #  | b                | 0.265 | 0.000 | 0.612 | --    | 0.104 |
  #  | c                | 0.109 | 0.000 | 0.643 | 0.260 | --    |
  #  ------------------------------------------------------------
  #  | k=1 Average      |       |       | 0.627 | 0.244 | 0.104 |
  #  ------------------------------------------------------------
  #  | a*b              | 0.877 | 0.000 | --    | --    | 0.099 |
  #  | a*c              | 0.752 | 0.000 | --    | 0.224 | --    |
  #  | b*c              | 0.369 | 0.000 | 0.607 | --    | --    |
  #  ------------------------------------------------------------
  #  | k=2 Average      |       |       | 0.607 | 0.224 | 0.099 |
  #  ------------------------------------------------------------
  #  | a*b*c            | 0.976 | 0.000 | --    | --    | --    |
  #  ------------------------------------------------------------
  #  | Overall averages |       |       | 0.628 | 0.245 | 0.104 |
  #  ------------------------------------------------------------
  #  
  #  Table: Pairwise dominance
  #  -----------------------------------------
  #  | Pairs | Total | Conditional | General |
  #  -----------------------------------------
  #  | a - b | 1.0   | 1.0         | 1.0     |
  #  | a - c | 1.0   | 1.0         | 1.0     |
  #  | b - c | 1.0   | 1.0         | 1.0     |
  #  -----------------------------------------
  #
  # == Reference:
  # * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
  # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
  # * Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
  #
  class DominanceAnalysis
    include Summarizable
    # Class to generate the regressions. Default to Statsample::Regression::Multiple::MatrixEngine
    attr_accessor :regression_class
    # Name of analysis
    attr_accessor :name
    # Set to true if you want to build from dataset, not correlation matrix
    attr_accessor :build_from_dataset
    #  Array with independent variables. You could create subarrays, 
    #  to test groups of predictors as blocks
    attr_accessor  :predictors
    # If you provide a matrix as input, you should set 
    # the number of cases to define significance of R^2
    attr_accessor  :cases
    # Method of :regression_class used to measure association. 
    # 
    # Only necessary to change if you have multivariate dependent.
    # * :r2yx (R^2_yx), the default option, is the  option when distinction
    #   between independent and dependents variable is arbitrary
    # * :p2yx is the option when the distinction between independent and dependents variables is real.
    #   
    
    attr_accessor  :method_association
    
    
    attr_reader :dependent
    
    UNIVARIATE_REGRESSION_CLASS=Statsample::Regression::Multiple::MatrixEngine
    MULTIVARIATE_REGRESSION_CLASS=Statsample::Regression::Multiple::MultipleDependent
    
    def self.predictor_name(variable)
      if variable.is_a? Array
        sprintf("(%s)", variable.join(","))
      else
        variable
      end
    end
    # Creates a new DominanceAnalysis object
    # Parameters:
    # * input:    A Matrix or Dataset object
    # * dependent: Name of dependent variable. Could be an array, if you want to
    #             do an Multivariate Regression Analysis. If nil, set to all
    #             fields on input, except criteria
 
    def initialize(input, dependent, opts=Hash.new)
      @build_from_dataset=false
      if dependent.is_a? Array
        @regression_class= MULTIVARIATE_REGRESSION_CLASS
        @method_association=:r2yx
      else
        @regression_class= UNIVARIATE_REGRESSION_CLASS
        @method_association=:r2
      end
      
      @name=nil
      opts.each{|k,v|
        self.send("#{k}=",v) if self.respond_to? k
      }
      @dependent=dependent
      @dependent=[@dependent] unless @dependent.is_a? Array
      
      @predictors ||= input.fields-@dependent
      
      @name=_("Dominance Analysis:  %s over %s") % [ @predictors.flatten.join(",") , @dependent.join(",")] if @name.nil?
      
      if input.is_a? Statsample::Dataset
        @ds=input
        @matrix=Statsample::Bivariate.correlation_matrix(input)
        @cases=Statsample::Bivariate.min_n_valid(input)
      elsif input.is_a? ::Matrix
        @ds=nil
        @matrix=input
      else
        raise ArgumentError.new("You should use a Matrix or a Dataset")
      end
      @models=nil
      @models_data=nil
      @general_averages=nil
    end
    # Compute models. 
    def compute
      create_models
      fill_models
    end
    def models
      if @models.nil?
        compute
      end
      @models
    end
    
    def models_data
      if @models_data.nil?
        compute
      end
      @models_data
    end
    def create_models
      @models=[]
      @models_data={}
      for i in 1..@predictors.size
        c=(0...@predictors.size).to_a.combination(i)
        c.each  do |data|
          
          independent=data.collect {|i1| @predictors[i1] }
          @models.push(independent)
          if (@build_from_dataset)
            data=@ds.dup(independent.flatten+@dependent)
          else
            data=@matrix.submatrix(independent.flatten+@dependent)
          end
          
          modeldata=ModelData.new(independent, data, self)
          models_data[independent.sort {|a,b| a.to_s<=>b.to_s}]=modeldata
        end
      end
    end
    def fill_models
      @models.each do |m|
        @predictors.each do |f|
          next if m.include? f
          base_model=md(m)
          comp_model=md(m+[f])
          base_model.add_contribution(f,comp_model.r2)
        end
      end
    end
    private :create_models, :fill_models
    
    def dominance_for_nil_model(i,j)
      if md([i]).r2>md([j]).r2
        1
      elsif md([i]).r2<md([j]).r2
        0
      else
        0.5
      end           
    end
    # Returns 1 if i D k, 0 if j dominates i and 0.5 if undetermined
    def total_dominance_pairwise(i,j)
      dm=dominance_for_nil_model(i,j)
      return 0.5 if dm==0.5
      dominances=[dm]
      models_data.each do |k,m|
        if !m.contributions[i].nil? and !m.contributions[j].nil?
          if m.contributions[i]>m.contributions[j]
              dominances.push(1)
          elsif m.contributions[i]<m.contributions[j]
              dominances.push(0)
          else
            return 0.5
              #dominances.push(0.5)
          end
        end
      end
      final=dominances.uniq
      final.size>1 ? 0.5 : final[0]
    end
    
    # Returns 1 if i cD k, 0 if j cD i and 0.5 if undetermined
    def conditional_dominance_pairwise(i,j)
      dm=dominance_for_nil_model(i,j)
      return 0.5 if dm==0.5
      dominances=[dm]
      for k in 1...@predictors.size
        a=average_k(k)
        if a[i]>a[j]
            dominances.push(1)
        elsif a[i]<a[j]
            dominances.push(0)
        else
          return 0.5
            #dominances.push(0.5)
        end                 
      end
      final=dominances.uniq
      final.size>1 ? 0.5 : final[0]            
    end
    # Returns 1 if i gD k, 0 if j gD i and 0.5 if undetermined        
    def general_dominance_pairwise(i,j)
      ga=general_averages
      if ga[i]>ga[j]
        1
      elsif ga[i]<ga[j]
        0
      else
        0.5
      end                 
    end
    def pairs
      models.find_all{|m| m.size==2}
    end
    def total_dominance
      pairs.inject({}){|a,pair| a[pair]=total_dominance_pairwise(pair[0], pair[1])
      a
      }
    end
    def conditional_dominance
      pairs.inject({}){|a,pair| a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
      a
      }
    end
    def general_dominance
      pairs.inject({}){|a,pair| a[pair]=general_dominance_pairwise(pair[0], pair[1])
      a
      }
    end
    
    def md(m)
      models_data[m.sort {|a,b| a.to_s<=>b.to_s}]
    end
    # Get all model of size k
    def md_k(k)
      out=[]
      @models.each{|m| out.push(md(m)) if m.size==k }
      out
    end
    
    # For a hash with arrays of numbers as values
    # Returns a hash with same keys and 
    # value as the mean of values of original hash
    
    def get_averages(averages)
      out={}
      averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
      out
    end
    # Hash with average for each k size model.
    def average_k(k)
      return nil if k==@predictors.size
      models=md_k(k)
      averages=@predictors.inject({}) {|a,v| a[v]=[];a}
      models.each do |m|
        @predictors.each do |f|
          averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
        end
      end
      get_averages(averages)
    end
    def general_averages
      if @general_averages.nil?
        averages=@predictors.inject({}) {|a,v| a[v]=[md([v]).r2];a}
        for k in 1...@predictors.size
          ak=average_k(k)
          @predictors.each do |f|
            averages[f].push(ak[f])
          end
        end
        @general_averages=get_averages(averages)
      end
      @general_averages
    end
    

    def report_building(g)
      compute if @models.nil?
      g.section(:name=>@name) do |generator|
        header=["","r2",_("sign")]+@predictors.collect {|c| DominanceAnalysis.predictor_name(c) }
        
        generator.table(:name=>_("Dominance Analysis result"), :header=>header) do |t|
          row=[_("Model 0"),"",""]+@predictors.collect{|f|
            sprintf("%0.3f",md([f]).r2)
          }
          
          t.row(row)
          t.hr
          for i in 1..@predictors.size
            mk=md_k(i)
            mk.each{|m|
              t.row(m.add_table_row)
            }
            # Report averages
            a=average_k(i)
            if !a.nil?
                t.hr
                row=[_("k=%d Average") % i,"",""] + @predictors.collect{|f|
                    sprintf("%0.3f",a[f])
                }
                t.row(row)
                t.hr
                
            end
          end
          
          g=general_averages
          t.hr
          
          row=[_("Overall averages"),"",""]+@predictors.collect{|f|
                    sprintf("%0.3f",g[f])
          }
          t.row(row)
        end
        
        td=total_dominance
        cd=conditional_dominance
        gd=general_dominance
        generator.table(:name=>_("Pairwise dominance"), :header=>[_("Pairs"),_("Total"),_("Conditional"),_("General")]) do |t|
          pairs.each{|pair|
            name=pair.map{|v| v.is_a?(Array) ? "("+v.join("-")+")" : v}.join(" - ")
            row=[name, sprintf("%0.1f",td[pair]), sprintf("%0.1f",cd[pair]), sprintf("%0.1f",gd[pair])]
            t.row(row)
          }
        end
      end
    end
    class ModelData # :nodoc:
      attr_reader :contributions
      def initialize(independent, data, da)
        @independent=independent
        @data=data
        @predictors=da.predictors
        @dependent=da.dependent
        @cases=da.cases
        @method=da.method_association
        @contributions=@independent.inject({}){|a,v| a[v]=nil;a}
        
        r_class=da.regression_class
        
        if @dependent.size==1
          @lr=r_class.new(data, @dependent[0], :cases=>@cases)
        else
          @lr=r_class.new(data, @dependent, :cases=>@cases)
        end
      end
      def add_contribution(f, v)
        @contributions[f]=v-r2
      end
      def r2
        @lr.send(@method)
      end
      def name
        @independent.collect {|variable|
          DominanceAnalysis.predictor_name(variable)
        }.join("*")
      end
      def add_table_row
        if @cases
          sign=sprintf("%0.3f", @lr.probability)
		else
		sign="???"
        end
      
        [name, sprintf("%0.3f",r2), sign] + @predictors.collect{|k|
          v=@contributions[k]
          if v.nil?
              "--"
          else
          sprintf("%0.3f",v)
          end
        }
      end
      def summary
        out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",name, r2, @lr.significance, @lr.sst)
        out << @predictors.collect{|k|
          v=@contributions[k]
          if v.nil?
              "--"
          else
            sprintf("%s=%0.3f",k,v)
          end
        }.join(" | ") 
        out << "\n"
        return out
      end
    end # end ModelData
  end # end Dominance Analysis
end

require 'statsample/dominanceanalysis/bootstrap'


================================================
FILE: lib/statsample/factor/map.rb
================================================
module Statsample
  module Factor
  # = Velicer's Minimum Average Partial
  # 
  # "Velicer’s (1976) MAP test involves a complete princi-
  # pal components analysis followed by the examination of
  # a series of matrices of partial correlations. Specifically,
  # on the first step, the first principal component is par-
  # tialed out of the correlations between the variables of in-
  # terest, and the average squared coefficient in the off-
  # diagonals of the resulting partial correlation matrix is
  # computed. On the second step, the first two principal
  # components are partialed out of the original correlation
  # matrix and the average squared partial correlation is
  # again computed. These computations are conducted for k
  # (the number of variables) minus one steps. The average
  # squared partial correlations from these steps are then
  # lined up, and the number of components is determined by
  # the step number in the analyses that resulted in the lowest
  # average squared partial correlation. The average squared
  # coefficient in the original correlation matrix is also com-
  # puted, and if this coefficient happens to be lower than
  # the lowest average squared partial correlation, then no
  # components should be extracted from the correlation ma-
  # trix. Statistically, components are retained as long as the
  # variance in the correlation matrix represents systematic
  # variance. Components are no longer retained when there
  # is proportionately more unsystematic variance than sys-
  # tematic variance." (O'Connor, 2000, p.397).
  # 
  # Current algorithm is loosely based on SPSS O'Connor algorithm
  # 
  # == Reference
  # * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
  #


    class MAP
      include Summarizable
      include DirtyMemoize
      # Name of analysis
      attr_accessor :name
      attr_reader :eigenvalues
      # Number of factors to retain
      attr_reader :number_of_factors
      # Average squared correlations
      attr_reader :fm
      # Smallest average squared correlation
      attr_reader :minfm
      
      attr_accessor :use_gsl
      def self.with_dataset(ds,opts=Hash.new)
        new(ds.correlation_matrix,opts)
      end
      def initialize(matrix, opts=Hash.new)
        @matrix=matrix
        opts_default={
          :use_gsl=>true,
          :name=>_("Velicer's MAP")
        }
        @opts=opts_default.merge(opts)
         opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
      end
      def compute
        gsl_m=(use_gsl and Statsample.has_gsl?) ? @matrix.to_gsl : @matrix
        klass_m=gsl_m.class
        eigvect,@eigenvalues=gsl_m.eigenvectors_matrix, gsl_m.eigenvalues
        eigenvalues_sqrt=@eigenvalues.collect {|v| Math.sqrt(v)}
        loadings=eigvect*(klass_m.diagonal(*eigenvalues_sqrt))
        fm=Array.new(@matrix.row_size)
        ncol=@matrix.column_size
        
        fm[0]=(gsl_m.mssq - ncol).quo(ncol*(ncol-1))
        
        (ncol-1).times do |m|
          puts "MAP:Eigenvalue #{m+1}" if $DEBUG
          a=loadings[0..(loadings.row_size-1),0..m]
          partcov= gsl_m - (a*a.transpose)
          
          d=klass_m.diagonal(*(partcov.diagonal.collect {|v| Math::sqrt(1/v)}))
          pr=d*partcov*d
          fm[m+1]=(pr.mssq-ncol).quo(ncol*(ncol-1))
        end
        minfm=fm[0]
        nfactors=0
        @errors=[]
        fm.each_with_index do |v,s|
          if defined?(Complex) and v.is_a? ::Complex
            @errors.push(s)
          else
            if v < minfm
              minfm=v
              nfactors=s
            end
          end
        end
        @number_of_factors=nfactors
        @fm=fm
        @minfm=minfm
        
      end
      def report_building(g) #:nodoc:
        g.section(:name=>@name) do |s|
          s.table(:name=>_("Eigenvalues"),:header=>[_("Value")]) do |t|
            eigenvalues.each_with_index do |e,i|
                t.row([@errors.include?(i) ? "*" : "%0.6f" % e])
            end
          end
          s.table(:name=>_("Velicer's Average Squared Correlations"), :header=>[_("number of components"),_("average square correlation")]) do |t|
            fm.each_with_index do |v,i|
              t.row(["%d" % i, @errors.include?(i) ? "*" : "%0.6f" % v])
            end
          end
          s.text(_("The smallest average squared correlation is : %0.6f" % minfm))
          s.text(_("The number of components is : %d" % number_of_factors))
        end
      end
      dirty_memoize :number_of_factors, :fm, :minfm, :eigenvalues

    end
  end
end


================================================
FILE: lib/statsample/factor/parallelanalysis.rb
================================================
module Statsample
  module Factor
    # Performs Horn's 'parallel analysis' to a principal components analysis
    # to adjust for sample bias in the retention of components. 
    # Can create the bootstrap samples using random data, using number
    # of cases and variables, parameters for actual data (mean and standard
    # deviation of each variable) or bootstrap sampling for actual data.
    # == Description
    # "PA involves the construction of a number of correlation matrices of random variables based on the same sample size and number of variables in the real data set. The average eigenvalues from the random correlation matrices are then compared to the eigenvalues from the real data correlation matrix, such that the first observed eigenvalue is compared to the first random eigenvalue, the second observed eigenvalue is compared to the second random eigenvalue, and so on." (Hayton, Allen & Scarpello, 2004, p.194)
    # == Usage
    # *With real dataset*
    #   # ds should be any valid dataset
    #   pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>100, :bootstrap_method=>:data)
    #
    # *With number of cases and variables*
    #   pa=Statsample::Factor::ParallelAnalysis.with_random_data(100,8)
    # 
    # == Reference
    # * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
    # * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
    # * Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.

    class ParallelAnalysis
      def self.with_random_data(cases,vars,opts=Hash.new)
        require 'ostruct'
        ds=OpenStruct.new
        ds.fields=vars.times.map {|i| "v#{i+1}"}
        ds.cases=cases
        opts=opts.merge({:bootstrap_method=> :random, :no_data=>true})
        new(ds, opts)
      end
      include DirtyMemoize
      include Summarizable
      # Number of random sets to produce. 50 by default
      attr_accessor :iterations
      # Name of analysis
      attr_accessor :name
      # Dataset. You could use mock vectors when use bootstrap method
      attr_reader :ds
      # Bootstrap method. <tt>:random</tt> used by default
      # * <tt>:random</tt>: uses number of variables and cases for the dataset
      # * <tt>:data</tt> : sample with replacement from actual data.
      attr_accessor :bootstrap_method
      # Uses smc on diagonal of matrixes, to perform simulation
      # of a Principal Axis analysis.
      # By default, false.
      attr_accessor :smc
      # Percentil over bootstrap eigenvalue should be accepted. 95 by default
      attr_accessor :percentil
      # Correlation matrix used with :raw_data . <tt>:correlation_matrix</tt> used by default
      attr_accessor :matrix_method
      # Number of eigenvalues to calculate. Should be set for 
      # Principal Axis Analysis.
      attr_accessor :n_variables
      # Dataset with bootstrapped eigenvalues
      attr_reader :ds_eigenvalues
      # Perform analysis without actual data. 
      attr_accessor :no_data
      # Show extra information if true
      attr_accessor :debug
      attr_accessor :use_gsl
      def initialize(ds, opts=Hash.new)
        @ds=ds
        @fields=@ds.fields
        @n_variables=@fields.size
        @n_cases=ds.cases
        opts_default={
          :name=>_("Parallel Analysis"),
          :iterations=>50, # See Liu and Rijmen (2008)
          :bootstrap_method => :random,
          :smc=>false,
          :percentil=>95, 
          :debug=>false,
          :no_data=>false,
          :matrix_method=>:correlation_matrix
        }
        @use_gsl=Statsample.has_gsl?
        @opts=opts_default.merge(opts)
        @opts[:matrix_method]==:correlation_matrix if @opts[:bootstrap_method]==:parameters
        opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
      end
      # Number of factor to retent
      def number_of_factors
        total=0
        ds_eigenvalues.fields.each_with_index do |f,i|
          if (@original[i]>0 and @original[i]>ds_eigenvalues[f].percentil(percentil))
            total+=1
          else
            break
          end
        end
        total
      end
      def report_building(g) #:nodoc:
        g.section(:name=>@name) do |s|
          s.text _("Bootstrap Method: %s") % bootstrap_method
          s.text _("Uses SMC: %s") % (smc ? _("Yes") : _("No"))
          s.text _("Correlation Matrix type : %s") % matrix_method
          s.text _("Number of variables: %d") % @n_variables
          s.text _("Number of cases: %d") % @n_cases
          s.text _("Number of iterations: %d") % @iterations
          if @no_data
            s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("generated eigenvalue"), "p.#{percentil}"]) do |t|
              ds_eigenvalues.fields.each_with_index do |f,i|
                v=ds_eigenvalues[f]
                t.row [i+1, "%0.4f" %  v.mean, "%0.4f" %  v.percentil(percentil), ]
              end
            end
          else
            s.text _("Number or factors to preserve: %d") % number_of_factors 
            s.table(:name=>_("Eigenvalues"), :header=>[_("n"), _("data eigenvalue"), _("generated eigenvalue"),"p.#{percentil}",_("preserve?")]) do |t|
              ds_eigenvalues.fields.each_with_index do |f,i|
                v=ds_eigenvalues[f]
                t.row [i+1, "%0.4f" % @original[i], "%0.4f" %  v.mean, "%0.4f" %  v.percentil(percentil), (v.percentil(percentil)>0 and @original[i] > v.percentil(percentil)) ? "Yes":""]
              end
            end
          end
          
        end
      end
      # Perform calculation. Shouldn't be called directly for the user
      def compute
        
        
        @original=Statsample::Bivariate.send(matrix_method, @ds).eigenvalues unless no_data        
        @ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v})
        @ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:scale}
        if bootstrap_method==:parameter or bootstrap_method==:random
          rng = Distribution::Normal.rng
        end
        
        @iterations.times do |i|
          begin
            puts "#{@name}: Iteration #{i}" if $DEBUG or debug
            # Create a dataset of dummy values
            ds_bootstrap=Statsample::Dataset.new(@ds.fields)
            
            @fields.each do |f|
              if bootstrap_method==:random
                ds_bootstrap[f]=@n_cases.times.map {|c| rng.call}.to_scale
              elsif bootstrap_method==:data
                ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases)
              else
                raise "bootstrap_method doesn't recogniced"
              end
            end
            ds_bootstrap.update_valid_data
            
            matrix=Statsample::Bivariate.send(matrix_method, ds_bootstrap)
            matrix=matrix.to_gsl if @use_gsl
            if smc
                smc_v=matrix.inverse.diagonal.map{|ii| 1-(1.quo(ii))}
                smc_v.each_with_index do |v,ii| 
                  matrix[ii,ii]=v
                end
            end
            ev=matrix.eigenvalues
            @ds_eigenvalues.add_case_array(ev)
          rescue Statsample::Bivariate::Tetrachoric::RequerimentNotMeet => e
            puts "Error: #{e}" if $DEBUG
            redo
          end
        end
        @ds_eigenvalues.update_valid_data
      end
      dirty_memoize :number_of_factors, :ds_eigenvalues
      dirty_writer :iterations, :bootstrap_method, :percentil, :smc
    end
  end
end


================================================
FILE: lib/statsample/factor/pca.rb
================================================
# encoding: UTF-8
module Statsample
module Factor
  # Principal Component Analysis (PCA) of a covariance or 
  # correlation matrix.. 
  #
  # NOTE: Sign of second and later eigenvalues could be different
  # using Ruby or GSL, so values for PCs and component matrix
  # should differ, because extendmatrix and gsl's methods to calculate
  # eigenvectors are different. Using R is worse, cause first 
  # eigenvector could have negative values!
  # For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis
  # 
  # == Usage:
  #   require 'statsample'
  #   a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
  #   b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
  #   ds={'a'=>a,'b'=>b}.to_dataset
  #   cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
  #   pca=Statsample::Factor::PCA.new(cor_matrix)
  #   pca.m
  #   => 1
  #   pca.eigenvalues
  #   => [1.92592927269225, 0.0740707273077545]
  #   pca.component_matrix
  #   => GSL::Matrix
  #   [  9.813e-01 
  #     9.813e-01 ]
  #   pca.communalities
  #   => [0.962964636346122, 0.962964636346122]
  #
  # == References:
  # * SPSS Manual
  # * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf 
  # * Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
  # 
  class PCA
    include Summarizable
    # Name of analysis
    attr_accessor :name

    # Number of factors. Set by default to the number of factors
    # with eigen values > 1
    attr_accessor :m
    # Use GSL if available
    attr_accessor :use_gsl
    # Add to the summary a rotation report
    attr_accessor :summary_rotation
    # Add to the summary a parallel analysis report
    attr_accessor :summary_parallel_analysis
    # Type of rotation. By default, Statsample::Factor::Rotation::Varimax
    attr_accessor :rotation_type
    attr_accessor :matrix_type
    def initialize(matrix, opts=Hash.new)
      @use_gsl=nil
      @name=_("Principal Component Analysis")
      @matrix=matrix
      @n_variables=@matrix.column_size      
      @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| _("VAR_%d") % (i+1)}
      
      @matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
      
      @m=nil
      
      @rotation_type=Statsample::Factor::Varimax
      
      opts.each{|k,v|
        self.send("#{k}=",v) if self.respond_to? k
      }
      if @use_gsl.nil?
        @use_gsl=Statsample.has_gsl?
      end
      if @matrix.respond_to? :fields
        @variables_names=@matrix.fields
      else
        @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
      end
      calculate_eigenpairs
      
      if @m.nil?
        # Set number of factors with eigenvalues > 1
        @m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
      end
      
    end
    def rotation
      @rotation_type.new(component_matrix)
    end
    def total_eigenvalues
      eigenvalues.inject(0) {|ac,v| ac+v}
    end
    def create_centered_ds
      h={}
      @original_ds.factors.each {|f|
        mean=@original_ds[f].mean
        h[f]=@original_ds[f].recode {|c| c-mean}
      }
      @ds=h.to_dataset
    end
    
    # Feature matrix for +m+ factors
    # Returns +m+ eigenvectors as columns.
    # So, i=variable, j=component
    def feature_matrix(m=nil)
      m||=@m
      if @use_gsl
        omega_m=GSL::Matrix.zeros(@n_variables,m)
        ev=eigenvectors
        m.times do |i|
          omega_m.set_column(i,ev[i])
        end
        omega_m
      else
        omega_m=::Matrix.build(@n_variables, m) {0}
        m.times do |i|
          omega_m.column= i, @eigenpairs[i][1]
        end
        omega_m
      end
    end
    # Returns Principal Components for +input+ matrix or dataset
    # The number of PC to return is equal to parameter +m+. 
    # If +m+ isn't set, m set to number of PCs selected at object creation.
    # Use covariance matrix
    
    def principal_components(input, m=nil)
      if @use_gsl
        data_matrix=input.to_gsl
      else
        data_matrix=input.to_matrix
      end
      m||=@m
      
      raise "data matrix variables<>pca variables" if data_matrix.column_size!=@n_variables
      
      fv=feature_matrix(m)
      pcs=(fv.transpose*data_matrix.transpose).transpose
      
      pcs.extend Statsample::NamedMatrix
      pcs.fields_y=m.times.map {|i| "PC_%d" % (i+1)}
      pcs.to_dataset
    end
    def component_matrix(m=nil)
      var="component_matrix_#{matrix_type}"
      send(var,m)
    end
    # Matrix with correlations between components and
    # variables. Based on Härdle & Simar (2003, p.243)
    def component_matrix_covariance(m=nil)
      m||=@m
      raise "m should be > 0" if m<1
      ff=feature_matrix(m)
      cm=::Matrix.build(@n_variables, m) {0}
      @n_variables.times {|i|
        m.times {|j|
          cm[i,j]=ff[i,j] * Math.sqrt(eigenvalues[j] / @matrix[i,i])
        }
      }
      cm.extend NamedMatrix
      cm.name=_("Component matrix (from covariance)")
      cm.fields_x = @variables_names
      cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
      
      cm
    end
    # Matrix with correlations between components and
    # variables
    def component_matrix_correlation(m=nil)
      m||=@m
      raise "m should be > 0" if m<1
      omega_m=::Matrix.build(@n_variables, m) {0}
      gammas=[]
      m.times {|i|
        omega_m.column=i, @eigenpairs[i][1]
        gammas.push(Math::sqrt(@eigenpairs[i][0]))
      }
      gamma_m=::Matrix.diagonal(*gammas)
      cm=(omega_m*(gamma_m)).to_matrix
      
      cm.extend CovariateMatrix
      cm.name=_("Component matrix")
      cm.fields_x = @variables_names
      cm.fields_y = m.times.map {|i| "PC_%d" % (i+1)}
      cm
    end
    def communalities(m=nil)
      
      m||=@m
      h=[]
      @n_variables.times do |i|
        sum=0
        m.times do |j|
          sum+=(@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
        end
        h.push(sum)
      end
      h
    end
    # Array with eigenvalues
    def eigenvalues
      @eigenpairs.collect {|c| c[0] }
    end
    def eigenvectors
      @eigenpairs.collect {|c| 
        @use_gsl ? c[1].to_gsl : c[1].to_vector
      }
    end
    def calculate_eigenpairs
      @eigenpairs= @use_gsl ? @matrix.to_gsl.eigenpairs : @matrix.to_matrix.eigenpairs_ruby 
    end
  
    
    def report_building(builder) # :nodoc:
      builder.section(:name=>@name) do |generator|
        generator.text _("Number of factors: %d") % m
        generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction"), _("%")]) do |t|
          communalities(m).each_with_index {|com, i|
            perc=com*100.quo(@matrix[i,i])
            t.row([@variables_names[i], "%0.3f" % @matrix[i,i]  , "%0.3f" % com, "%0.3f" % perc])
          }
        end
        te=total_eigenvalues
        generator.table(:name=>_("Total Variance Explained"), :header=>[_("Component"), _("E.Total"), _("%"), _("Cum. %")]) do |t|
          ac_eigen=0
          eigenvalues.each_with_index {|eigenvalue,i|
            ac_eigen+=eigenvalue
            t.row([_("Component %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(te)), sprintf("%0.3f",ac_eigen*100.quo(te))])
          }
        end
        
        generator.parse_element(component_matrix(m))
                  
        if (summary_rotation)
          generator.parse_element(rotation)
        end
      end
    end
    private :calculate_eigenpairs, :create_centered_ds
  end
end
end


================================================
FILE: lib/statsample/factor/principalaxis.rb
================================================
module Statsample
module Factor
  # Principal Axis Analysis for a covariance or correlation matrix. 
  #
  # For PCA, use Statsample::Factor::PCA
  # 
  # == Usage:
  #   require 'statsample'
  #   a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
  #   b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
  #   ds={'a'=>a,'b'=>b}.to_dataset
  #   cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
  #   pa=Statsample::Factor::PrincipalAxis.new(cor_matrix)
  #   pa.iterate(1)
  #   pa.m
  #   => 1
  #   pca.component_matrix
  #   => GSL::Matrix
  #   [  9.622e-01 
  #      9.622e-01 ]
  #   pca.communalities
  #   => [0.962964636346122, 0.962964636346122]
  #
  # == References:
  # * SPSS Manual
  # * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf 
  #   
  class PrincipalAxis
    include DirtyMemoize
    include Summarizable
    # Name of analysis
    attr_accessor :name

    # Number of factors. Set by default to the number of factors
    # with eigenvalues > 1 (Kaiser criterion).
    # 
    # _Warning:_ Kaiser criterion overfactors! Give yourself some time
    # and use Horn's Parallel Analysis.
    #
    attr_accessor :m
    
    # Number of iterations required to converge
    attr_reader :iterations
    
    # Initial eigenvalues 
    attr_reader :initial_eigenvalues
    
    # Tolerance for iterations
    attr_accessor :epsilon
    
    # Use SMC(squared multiple correlations) as diagonal. If false, use 1
    attr_accessor :smc
    
    # Maximum number of iterations
    attr_accessor :max_iterations
    
    # Eigenvalues of factor analysis
    attr_reader :eigenvalues
    
    # Minimum difference between succesive iterations on sum of communalities
    DELTA=1e-3
    # Maximum number of iterations
    MAX_ITERATIONS=25
    
    def initialize(matrix, opts=Hash.new)
      @matrix=matrix
      if @matrix.respond_to? :fields
        @fields=@matrix.fields
      else
        @fields=@matrix.row_size.times.map {|i| _("Variable %d") % (i+1)}
      end
      @n_variables=@matrix.row_size
      @name=""
      @m=nil
      @initial_eigenvalues=nil
      @initial_communalities=nil
      @component_matrix=nil
      @delta=DELTA
      @smc=true
      @max_iterations=MAX_ITERATIONS
      opts.each{|k,v|
        self.send("#{k}=",v) if self.respond_to? k
      }
      if @matrix.respond_to? :fields
        @variables_names=@matrix.fields
      else
        @variables_names=@n_variables.times.map {|i| "V#{i+1}"}
      end
      if @m.nil?
        pca=PCA.new(::Matrix.rows(@matrix.to_a))
        @m=pca.m
      end
      
      @clean=true
    end
    # Communality for all variables given m factors
    def communalities(m=nil)
      if m!=@m or @clean
        iterate(m)
        raise "Can't calculate comunality" if @communalities.nil?
      end
      @communalities
    end
    # Component matrix for m factors
    def component_matrix(m=nil)
      if m!=@m  or @clean
        iterate(m)
      end
      @component_matrix
    end
    # Iterate to find the factors
    def iterate(m=nil)
      @clean=false
      m||=@m
      @m=m
      t = @max_iterations
      work_matrix=@matrix.to_a
      
      prev_com=initial_communalities
      
      pca=PCA.new(::Matrix.rows(work_matrix))
      @initial_eigenvalues=pca.eigenvalues
      prev_sum=prev_com.inject(0) {|ac,v| ac+v}
      @iterations=0
      t.times do |i|
        "#{@name}: Iteration #{i}" if $DEBUG
        @iterations+=1
        prev_com.each_with_index{|v,it|
          work_matrix[it][it]=v
        }
        pca=PCA.new(::Matrix.rows(work_matrix))
        @communalities=pca.communalities(m)
        @eigenvalues=pca.eigenvalues
        com_sum = @communalities.inject(0) {|ac,v| ac+v}
        #jump=true
        
        break if (com_sum-prev_sum).abs < @delta
        @communalities.each_with_index do |v2,i2|
          raise "Variable #{i2} with communality > 1" if v2>1.0
        end
        prev_sum=com_sum
        prev_com=@communalities
        
      end
      @component_matrix=pca.component_matrix(m)
      @component_matrix.extend CovariateMatrix
      @component_matrix.name=_("Factor Matrix")
      @component_matrix.fields_x = @variables_names
      @component_matrix.fields_y = m.times.map {|i| "factor_#{i+1}"}
      
    end
    alias :compute :iterate 
    
    def initial_communalities
      if @initial_communalities.nil?
        
        if @smc
          # Based on O'Connors(2000)
          @initial_communalities=@matrix.inverse.diagonal.map{|i| 1-(1.quo(i))}
=begin
        @initial_communalities=@matrix.column_size.times.collect {|i|
          rxx , rxy = PrincipalAxis.separate_matrices(@matrix,i)
          matrix=(rxy.t*rxx.inverse*rxy)
          matrix[0,0]
        }
=end
        else
          @initial_communalities=[1.0]*@matrix.column_size
        end
      end      
      @initial_communalities
    end
    
    
    # Returns two matrixes from a correlation matrix
    # with regressors correlation matrix and criteria xy
    # matrix.
    def self.separate_matrices(matrix, y)
      ac=[]
      matrix.column_size.times do |i|
        ac.push(matrix[y,i]) if i!=y
      end
      rxy=Matrix.columns([ac])
      rows=[]
      matrix.row_size.times do |i|
        if i!=y
          row=[]
          matrix.row_size.times do |j|
            row.push(matrix[i,j]) if j!=y
          end
          rows.push(row)
        end
      end
      rxx=Matrix.rows(rows)
      [rxx,rxy]
    end
    def report_building(generator)
      iterate if @clean
      generator.section(:name=>@name) do |s|
        s.text _("Number of factors: %d") % m
        s.text _("Iterations: %d") % @iterations
        s.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction")]) do |t|
          communalities(m).each_with_index {|com,i|
            t.row([@fields[i], sprintf("%0.4f", initial_communalities[i]), sprintf("%0.3f", com)])
          }
        end
        s.table(:name=>_("Total Variance"), :header=>[_("Factor"), _("I.E.Total"), _("I.E. %"), _("I.E.Cum. %"),
        _("S.L.Total"), _("S.L. %"), _("S.L.Cum. %")
          ]) do |t|
        ac_eigen,ac_i_eigen=0,0
          @initial_eigenvalues.each_with_index {|eigenvalue,i|
            ac_i_eigen+=eigenvalue
            ac_eigen+=@eigenvalues[i]
            new_row=[
            _("Factor %d") % (i+1), 
            sprintf("%0.3f",eigenvalue),
            sprintf("%0.3f%%", eigenvalue*100.quo(@n_variables)),
            sprintf("%0.3f",ac_i_eigen*100.quo(@n_variables))
            ]
            if i<@m
              new_row.concat [
                sprintf("%0.3f", @eigenvalues[i]),
                sprintf("%0.3f%%", @eigenvalues[i]*100.quo(@n_variables)),
                sprintf("%0.3f",ac_eigen*100.quo(@n_variables))              
              ]
            else
              new_row.concat ["","",""]
            end
            
            t.row new_row
          }
        end
        s.parse_element(component_matrix)
      end
    end
    
    dirty_writer :max_iterations, :epsilon, :smc
    dirty_memoize :eigenvalues, :iterations, :initial_eigenvalues

  end
  
end
end


================================================
FILE: lib/statsample/factor/rotation.rb
================================================
module Statsample
module Factor
  # Base class for component matrix rotation.
  #
  # == Reference:
  # * SPSS Manual
  # * Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
  # 
  # Use subclasses Varimax, Equimax or Quartimax for desired type of rotation
  #   Use:
  #   a = Matrix[ [ 0.4320,  0.8129,  0.3872] 
  #     , [ 0.7950, -0.5416,  0.2565]  
  #     , [ 0.5944,  0.7234, -0.3441]  
  #     , [ 0.8945, -0.3921, -0.1863] ]
  #   rotation = Statsample::Factor::Varimax(a)
  #   rotation.iterate
  #   p rotation.rotated
  #   p rotation.component_transformation_matrix
  # 
  class Rotation
    EPSILON=1e-15
    MAX_ITERATIONS=25
    include Summarizable
    include DirtyMemoize
    attr_reader :iterations, :rotated, :component_transformation_matrix, :h2
    # Maximum number of iterations    
    attr_accessor :max_iterations
    # Maximum precision    
    attr_accessor :epsilon
    attr_accessor :use_gsl
    dirty_writer :max_iterations, :epsilon
    dirty_memoize :iterations, :rotated, :component_transformation_matrix, :h2
    
    def initialize(matrix, opts=Hash.new)
      @name=_("%s rotation") % rotation_name
      @matrix=matrix
      @n=@matrix.row_size # Variables, p on original
      @m=@matrix.column_size # Factors, r on original
      @component_transformation_matrix=nil
      @max_iterations=MAX_ITERATIONS
      @epsilon=EPSILON
      @rotated=nil
      @h2=(@matrix.collect {|c| c**2} * Matrix.column_vector([1]*@m)).column(0).to_a
      @use_gsl=Statsample.has_gsl?
      opts.each{|k,v|
        self.send("#{k}=",v) if self.respond_to? k
      }
    end
    def report_building(g)
      g.section(:name=>@name) do |s|
        s.parse_element(rotated)
        s.parse_element(component_transformation_matrix)
      end
    end
    alias_method :communalities, :h2
    alias_method :rotated_component_matrix, :rotated
    def compute
      iterate
    end
    # Start iteration 
    def iterate
      k_matrix=@use_gsl ? GSL::Matrix : ::Matrix
      t=k_matrix.identity(@m)
      b=(@use_gsl ? @matrix.to_gsl : @matrix.dup)
      h=k_matrix.diagonal(*@h2).collect {|c| Math::sqrt(c)}
      h_inverse=h.collect {|c| c!=0 ? 1/c : 0 }
      bh=h_inverse * b
      @not_converged=true
      @iterations=0
      while @not_converged
        break if @iterations>@max_iterations
        @iterations+=1
        #puts "Iteration #{iterations}"
        num_pairs=@m*(@m-1).quo(2)
        (0..(@m-2)).each do |i| #+ go through factor index 0:r-1-1 (begin)
          ((i+1)..(@m-1)).each do |j| #+ pair i to "rest" of factors (begin)
            
            xx = bh.column(i)
            yy = bh.column(j)
            tx = t.column(i)
            ty = t.column(j)
            
            uu = @n.times.collect {|var_i| xx[var_i]**2-yy[var_i]**2}
            vv = @n.times.collect {|var_i| 2*xx[var_i]*yy[var_i]}
            
            a  = @n.times.inject(0) {|ac,var_i| ac+ uu[var_i] }
            b  = @n.times.inject(0) {|ac,var_i| ac+ vv[var_i] }
            c  = @n.times.inject(0) {|ac,var_i| ac+ (uu[var_i]**2 - vv[var_i]**2) }
            d  = @n.times.inject(0) {|ac,var_i| ac+ (2*uu[var_i]*vv[var_i]) }
            num=x(a,b,c,d)
            den=y(a,b,c,d)
            phi=Math::atan2(num,den) / 4.0
            # puts "#{i}-#{j}: #{phi}"
            
            if(Math::sin(phi.abs) >= @epsilon)
              xx_rot=( Math::cos(phi)*xx)+(Math::sin(phi)*yy)
              yy_rot=((-Math::sin(phi))*xx)+(Math::cos(phi)*yy)
              
              
              tx_rot=( Math::cos(phi)*tx)+(Math::sin(phi)*ty)
              ty_rot=((-Math::sin(phi))*tx)+(Math::cos(phi)*ty)

              
              bh=bh.to_a

              @n.times {|row_i|
                bh[row_i][i] = xx_rot[row_i]
                bh[row_i][j] = yy_rot[row_i]
              }
              t=t.to_a
              @m.times {|row_i|
                t[row_i][i]=tx_rot[row_i]
                t[row_i][j]=ty_rot[row_i]
              }
              #if @use_gsl
                bh=k_matrix.[](*bh)
                t=k_matrix.[](*t)
              #else
              #  bh=Matrix.rows(bh)
              #  t=Matrix.rows(t)
                
              #end
            else
              num_pairs=num_pairs-1
              @not_converged=false if num_pairs==0
            end # if
          end #j
        end #i
      end # while
      @rotated=h*bh
      @rotated.extend CovariateMatrix
      @rotated.name=_("Rotated Component matrix")
      
      if @matrix.respond_to? :fields_x
        @rotated.fields_x = @matrix.fields_x
      else
        @rotated.fields_x = @n.times.map {|i| "var_#{i+1}"}
      end
      if @matrix.respond_to? :fields_y
        @rotated.fields_y = @matrix.fields_y
      else
        @rotated.fields_y = @m.times.map {|i| "var_#{i+1}"}
      end
      
      
      @component_transformation_matrix=t
      @component_transformation_matrix.extend CovariateMatrix
      @component_transformation_matrix.name=_("Component transformation matrix")
      
      if @matrix.respond_to? :fields_y
        @component_transformation_matrix.fields = @matrix.fields_y
        
      else
        @component_transformation_matrix.fields = @m.times.map {|i| "var_#{i+1}"}
      end
      
      @rotated
    end

  end
  class Varimax < Rotation
    def x(a,b,c,d)
      d-(2*a*b / @n.to_f)
    end
    def y(a,b,c,d)
      c-((a**2-b**2) / @n.to_f)
    end
    def rotation_name
      "Varimax"
    end
  end
  class Equimax < Rotation
    def x(a,b,c,d)
      d-(@m*a*b / @n.to_f)
    end
    def y(a,b,c,d)
      c-@m*((a**2-b**2) / (2*@n.to_f))
    end
    def rotation_name
      "Equimax"
    end

  end
  class Quartimax < Rotation
    def x(a,b,c,d)
      d
    end
    def y(a,b,c,d)
      c
    end
    def rotation_name
      "Quartimax"
    end
    
  end
end
end


================================================
FILE: lib/statsample/factor.rb
================================================
require 'statsample/factor/rotation'
require 'statsample/factor/pca'
require 'statsample/factor/principalaxis'
require 'statsample/factor/parallelanalysis'
require 'statsample/factor/map'

module Statsample
  # Factor Analysis toolbox.
  # * Classes for Extraction of factors: 
  #   * Statsample::Factor::PCA
  #   * Statsample::Factor::PrincipalAxis
  # * Classes for Rotation of factors: 
  #   * Statsample::Factor::Varimax
  #   * Statsample::Factor::Equimax
  #   * Statsample::Factor::Quartimax
  # * Classes for determining the number of components
  #   * Statsample::Factor::MAP
  #   * Statsample::Factor::ParallelAnalysis
  #
  # About number of components, O'Connor(2000) said:
  #  The two procedures [PA and MAP ] complement each other nicely,
  #  in that the MAP tends to err (when it does err) in the direction
  #  of underextraction, whereas parallel analysis tends to err
  #  (when it does err) in the direction of overextraction.
  #  Optimal decisions are thus likely to be made after considering
  #  the results of both analytic procedures. (p.10)

  module Factor
    # Anti-image covariance matrix.
    # Useful for inspection of desireability of data for factor analysis.
    # According to Dziuban  & Shirkey (1974, p.359): 
    #   "If this matrix does not exhibit many zero off-diagonal elements,
    #   the investigator has evidence that the correlation
    #   matrix is not appropriate for factor analysis."
    # 
    def self.anti_image_covariance_matrix(matrix)
      s2=Matrix.diag(*(matrix.inverse.diagonal)).inverse
      aicm=(s2)*matrix.inverse*(s2)
      aicm.extend(Statsample::CovariateMatrix)
      aicm.fields=matrix.fields if matrix.respond_to? :fields
      aicm
    end
    def self.anti_image_correlation_matrix(matrix)
      matrix=matrix.to_matrix
      s=Matrix.diag(*(matrix.inverse.diagonal)).sqrt.inverse
      aicm=s*matrix.inverse*s
      
      aicm.extend(Statsample::CovariateMatrix)
      aicm.fields=matrix.fields if matrix.respond_to? :fields
      aicm
      
    end
      
    # Kaiser-Meyer-Olkin measure of sampling adequacy for correlation matrix.
    # 
    # Kaiser's (1974, cited on Dziuban  & Shirkey, 1974) present calibration of the index is as follows :
    # * .90s—marvelous
    # * .80s— meritorious
    # * .70s—middling
    # * .60s—mediocre
    # * .50s—miserable
    # * .50 •—unacceptable
    def self.kmo(matrix)
      q=anti_image_correlation_matrix(matrix)
      n=matrix.row_size
      sum_r,sum_q=0,0
      n.times do |j|
        n.times do |k|
          if j!=k
            sum_r+=matrix[j,k]**2
            sum_q+=q[j,k]**2
          end
        end
      end
      sum_r.quo(sum_r+sum_q)
    end
    # Kaiser-Meyer-Olkin measure of sampling adequacy for one variable.
    # 
    def self.kmo_univariate(matrix, var)
      if var.is_a? String
        if matrix.respond_to? :fields
          j=matrix.fields.index(var)
          raise "Matrix doesn't have field #{var}" if j.nil?
        else
          raise "Matrix doesn't respond to fields"
        end
      else
        j=var
      end
      
      q=anti_image_correlation_matrix(matrix)
      n=matrix.row_size
      
      sum_r,sum_q=0,0
      
      n.times do |k|
        if j!=k
          sum_r+=matrix[j,k]**2
          sum_q+=q[j,k]**2
        end
      end
      sum_r.quo(sum_r+sum_q)
    end
    
  end
end


================================================
FILE: lib/statsample/graph/boxplot.rb
================================================
require 'rubyvis'
module Statsample
  module Graph
    # = Boxplot
    # 
    # From Wikipedia:
    # In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers.
    # 
    # == Usage
    # === Svg output
    #  a=[1,2,3,4].to_scale
    #  b=[3,4,5,6].to_scale
    #  puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
    # === Using ReportBuilder
    #  a=[1,2,3,4].to_scale
    #  b=[3,4,5,6].to_scale
    #  rb=ReportBuilder.new
    #  rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
    #  rb.save_html('boxplot.html')
    
    class Boxplot
      include Summarizable
      attr_accessor :name
      # Total width of Boxplot
      attr_accessor :width
      # Total height of Boxplot
      attr_accessor :height
      # Top margin
      attr_accessor :margin_top
      # Bottom margin
      attr_accessor :margin_bottom
      # Left margin
      attr_accessor :margin_left
      # Right margin
      attr_accessor :margin_right
      # Array with assignation to groups of bars
      # For example, for four vectors, 
      #   boxplot.groups=[1,2,1,3]
      # Assign same color to first and third element, and different to
      # second and fourth
      attr_accessor :groups
      # Minimum value on y-axis. Automaticly defined from data
      attr_accessor :minimum
      # Maximum value on y-axis. Automaticly defined from data
      attr_accessor :maximum
      # Vectors to box-ploting
      attr_accessor :vectors
      # The rotation angle, in radians. Text is rotated clockwise relative 
      # to the anchor location. For example, with the default left alignment, 
      # an angle of Math.PI / 2 causes text to proceed downwards. The default angle is zero.      
      attr_accessor :label_angle
      attr_reader :x_scale, :y_scale
      # Create a new Boxplot.
      # Parameters: Hash of options
      # * :vectors: Array of vectors
      # * :groups: Array of same size as :vectors:, with name of groups
      #           to colorize vectors
      def initialize(opts=Hash.new)
        @vectors=opts.delete :vectors
        raise "You should define vectors" if @vectors.nil?
        
        opts_default={
          :name=>_("Boxplot"),
          :groups=>nil,
          :width=>400,
          :height=>300,
          :margin_top=>10,
          :margin_bottom=>20,
          :margin_left=>20,
          :margin_right=>20,
          :minimum=>nil,
          :maximum=>nil,
          :label_angle=>0
        }
        @opts=opts_default.merge(opts)
        opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
      end
      
      # Returns a Rubyvis panel with scatterplot
      def rubyvis_panel # :nodoc:
        that=self
        
        min,max=@minimum, @maximum
        
        min||=@vectors.map {|v| v.min}.min
        max||=@vectors.map {|v| v.max}.max
        
        
        margin_hor=margin_left + margin_right
        margin_vert=margin_top  + margin_bottom
        x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
        y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert)
        y_scale.nice
        # cache data
        
        colors=Rubyvis::Colors.category10
        
        data=@vectors.map {|v|
          out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name}
          out[:iqr]=out[:percentil_75] - out[:percentil_25]
          
          irq_max=out[:percentil_75] + out[:iqr]
          irq_min=out[:percentil_25] - out[:iqr]
          
          # Find the last data inside the margin
          min = out[:percentil_25]
          max = out[:percentil_75]
          
          v.each {|d|
            min=d if d < min and d > irq_min
            max=d if d > max and d < irq_max
          }
          # Whiskers!
          out[:low_whisker]=min
          out[:high_whisker]=max
          # And now, data outside whiskers
          out[:outliers]=v.data_with_nils.find_all {|d| d < min or d > max }
          out
        }
        

        vis=Rubyvis::Panel.new do |pan| 
          pan.width  width  - margin_hor
          pan.height height - margin_vert
          pan.bottom margin_bottom
          pan.left   margin_left
          pan.right  margin_right
          pan.top    margin_top
           # Y axis
          pan.rule do
            data y_scale.ticks
            bottom y_scale
            stroke_style {|d| d!=0 ? "#eee" : "#000"}
            label(:anchor=>'left') do
              text y_scale.tick_format
            end
          end
          pan.rule do
            bottom 0
            stroke_style 'black'
          end
          
          # Labels
          
          pan.label  do |l|
            l.data data
            l.text_angle that.label_angle
            l.left  {|v| x_scale[index] }
            l.bottom(-15)
            l.text {|v,x| v[:name]}
          end
          
          pan.panel do |bp|
            bp.data data
            bp.left {|v|  x_scale[index]}
            bp.width x_scale.range_band
            
            
            # Bar
            bp.bar do |b|
              b.bottom {|v| y_scale[v[:percentil_25]]}
              b.height {|v| y_scale[v[:percentil_75]] - y_scale[v[:percentil_25]] }
              b.line_width 1
              b.stroke_style  {|v| 
                if that.groups
                  colors.scale(that.groups[parent.index]).darker
                else
                  colors.scale(index).darker
                end
              
              
              }
              b.fill_style {|v| 
                if that.groups
                  colors.scale(that.groups[parent.index])
                else
                  colors.scale(index)
                end
              }
            end
            # Median
            bp.rule do |r|
              r.bottom {|v| y_scale[v[:median]]}
              r.width x_scale.range_band
              r.line_width 2
            end
            ##
            # Whiskeys
            ##
            # Low whiskey
            bp.rule do |r|
              r.visible {|v| v[:percentil_25] > v[:low_whisker]}
              r.bottom {|v| y_scale[v[:low_whisker]]}              
            end
            
            bp.rule do |r|
              r.visible {|v| v[:percentil_25] > v[:low_whisker]}
              r.bottom {|v| y_scale[v[:low_whisker]]}              
              r.left {|v| x_scale.range_band / 2.0}
              r.height {|v| y_scale.scale(v[:percentil_25]) - y_scale.scale(v[:low_whisker])}
            end
            # High whiskey

            bp.rule do |r|
              r.visible {|v| v[:percentil_75] < v[:high_whisker]}
              r.bottom {|v| y_scale.scale(v[:high_whisker])}              
            end
            
             bp.rule do |r|
              r.visible {|v| v[:percentil_75] < v[:high_whisker]}
              r.bottom {|v| y_scale.scale(v[:percentil_75])}              
              r.left {|v| x_scale.range_band / 2.0}
              r.height {|v| y_scale.scale(v[:high_whisker]) - y_scale.scale(v[:percentil_75])}
            end
            # Outliers
            bp.dot do |dot|
              dot.shape_size 4
              dot.data {|v| v[:outliers]}
              dot.left {|v| x_scale.range_band / 2.0}
              dot.bottom {|v| y_scale.scale(v)}
              dot.title {|v| v}
            end
          end
        end
        vis
      end
      
      # Returns SVG with scatterplot
      def to_svg
        rp=rubyvis_panel
        rp.render
        rp.to_svg
      end
      def report_building(builder) # :nodoc:
        builder.section(:name=>name) do |b|
          b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
        end
        
      end
    end
  end
end


================================================
FILE: lib/statsample/graph/histogram.rb
================================================
require 'rubyvis'
module Statsample
  module Graph
    
    # In statistics, a histogram is a graphical representation, showing a visual impression of the distribution of experimental data. It is an estimate of the probability distribution of a continuous variable and was first introduced by Karl Pearson [1]. A histogram consists of tabular frequencies, shown as adjacent rectangles, erected over discrete intervals (bins), with an area equal to the frequency of the observations in the interval. The height of a rectangle is also equal to the frequency density of the interval, i.e., the frequency divided by the width of the interval. The total area of the histogram is equal to the number of data.
    # 
    # == Usage
    # === Svg output
    #  a=[1,2,3,4].to_scale
    #  puts Statsample::Graph::Histogram.new(a).to_svg
    # === Using ReportBuilder
    #  a=[1,2,3,4].to_scale
    #  rb=ReportBuilder.new
    #  rb.add(Statsample::Graph::Histogram.new(a))
    #  rb.save_html('histogram.html')
    
    class Histogram
      include Summarizable
      # Histogram name
      attr_accessor :name
      # Total width
      attr_accessor :width
      # Total height
      attr_accessor :height
      # Top margin
      attr_accessor :margin_top
      # Bottom margin
      attr_accessor :margin_bottom
      # Left margin
      attr_accessor :margin_left
      # Right margin
      attr_accessor :margin_right
      attr_reader :hist
      # Could be an array of ranges or number of bins
      attr_accessor :bins
      # Minimum value on x axis. Calculated automaticly from data if not set
      attr_accessor :minimum_x
      # Maximum value on x axis. Calculated automaticly from data if not set
      attr_accessor :maximum_x
      # Minimum value on y axis. Set to 0 if not set
      attr_accessor :minimum_y
      # Maximum value on y axis. Calculated automaticly from data if not set.
      attr_accessor :maximum_y
      # Add a line showing normal distribution
      attr_accessor :line_normal_distribution
      # data could be a vector or a histogram
      def initialize(data, opts=Hash.new)
        prov_name=(data.respond_to?(:name)) ? data.name : ""
        opts_default={
          :name=>_("Histograma (%s)") % prov_name,
          :width=>400,
          :height=>300,
          :margin_top=>10,
          :margin_bottom=>20,
          :margin_left=>30,
          :margin_right=>20,
          :minimum_x=>nil,
          :maximum_x=>nil,
          :minimum_y=>nil,
          :maximum_y=>nil,
          :bins=>nil,
          :line_normal_distribution=>false
        }
        @opts=opts_default.merge(opts)
        opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
        @data=data
      end
      def pre_vis # :nodoc:
        if @data.is_a? Statsample::Histogram
          @hist=@data
          @mean=@hist.estimated_mean
          @sd=@hist.estimated_standard_deviation
        elsif @data.is_a? Statsample::Vector
          @mean=@data.mean
          @sd=@data.sd
          @bins||=Math::sqrt(@data.size).floor
          @hist=@data.histogram(@bins)
        end
      end
      def rubyvis_normal_distribution(pan)
        x_scale=@x_scale
        y_scale=@y_scale
        
        wob = @hist.get_range(0)[1] - @hist.get_range(0)[0]
        
        nob = ((@maximum_x-@minimum_x) / wob.to_f).floor
        sum=@hist.sum
        
        data=nob.times.map {|i|
          l=@minimum_x+i*wob
          r=@minimum_x+(i+1)*wob          
          middle=(l+r) / 2.0
          pi=Distribution::Normal.cdf((r-@mean) / @sd) - Distribution::Normal.cdf((l-@mean) / @sd)
          {:x=>middle, :y=>pi*sum}
        }
        pan.line do |l|
          l.data data
          l.interpolate "cardinal"
          l.stroke_style "black"
          l.bottom {|d| y_scale[d[:y]]}
          l.left {|d| x_scale[d[:x]]}
        end
        
      end
      # Returns a Rubyvis panel with scatterplot
      def rubyvis_panel # :nodoc:
        pre_vis
        #that=self
        
        @minimum_x||=@hist.min
        @maximum_x||=@hist.max
        @minimum_y||=0
        @maximum_y||=@hist.max_val
        
        margin_hor=margin_left + margin_right
        margin_vert=margin_top  + margin_bottom
      
        x_scale = pv.Scale.linear(@minimum_x, @maximum_x).range(0, width - margin_hor)
      
        y_scale=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
        
        y_scale.nice
        
        bins=@hist.bins.times.map {|i|
          {
           :low =>@hist.get_range(i)[0],
           :high=>@hist.get_range(i)[1],
           :value=>@hist.bin[i]
          }
        }
        @x_scale=x_scale
        @y_scale=y_scale
        # cache data
        vis=Rubyvis::Panel.new do |pan| 
          pan.width  width  - margin_hor
          pan.height height - margin_vert
          pan.bottom margin_bottom
          pan.left   margin_left
          pan.right  margin_right
          pan.top    margin_top
           # Y axis
          pan.rule do
            data y_scale.ticks
            bottom y_scale
            stroke_style {|d| d!=0 ? "#eee" : "#000"}
            label(:anchor=>'left') do
              text y_scale.tick_format
            end
          end
          # X axis
          pan.rule do
            data x_scale.ticks
            left x_scale
            stroke_style "black"
            height 5
            bottom(-5)
            label(:anchor=>'bottom') do
              text x_scale.tick_format
            end
          end
         
          pan.bar do |bar|
            bar.data(bins)
            bar.left {|v| x_scale[v[:low]]}
            bar.width {|v| x_scale[v[:high]] - x_scale[v[:low]]}
            bar.bottom 0
            bar.height {|v| y_scale[v[:value]]}
            bar.stroke_style "black"
            bar.line_width 1
          end
           rubyvis_normal_distribution(pan) if @line_normal_distribution
        end
        vis
      end
      # Returns SVG with scatterplot
      def to_svg
        rp=rubyvis_panel
        rp.render
        rp.to_svg
      end
      def report_building(builder) # :nodoc:
        builder.section(:name=>name) do |b|
          b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
        end
      end
      def report_building_text(generator)
        pre_vis
        #anchor=generator.toc_entry(_("Histogram %s") % [@name])
        step=  @hist.max_val > 40 ? ( @hist.max_val / 40).ceil : 1
          
        @hist.range.each_with_index do |r,i|
          next if i==@hist.bins
          generator.text(sprintf("%5.2f : %s", r, "*" * (@hist.bin[i] / step).floor ))
        end
      end
    end
  end
end


================================================
FILE: lib/statsample/graph/scatterplot.rb
================================================
require 'rubyvis'
module Statsample
  module Graph
    # = Scatterplot
    # 
    # From Wikipedia:
    # A scatter plot or scattergraph is a type of mathematical diagram using
    # Cartesian coordinates to display values for two variables for a set of data.
    # 
    # The data is displayed as a collection of points, each having the value of one variable determining the position on the horizontal axis and the value of the other variable determining the position on the vertical axis.[2] This kind of plot is also called a scatter chart, scatter diagram and scatter graph.
    # == Usage
    # === Svg output
    #  a=[1,2,3,4].to_scale
    #  b=[3,4,5,6].to_scale
    #  puts Statsample::Graph::Scatterplot.new(a,b).to_svg
    # === Using ReportBuilder
    #  a=[1,2,3,4].to_scale
    #  b=[3,4,5,6].to_scale
    #  rb=ReportBuilder.new
    #  rb.add(Statsample::Graph::Scatterplot.new(a,b))
    #  rb.save_html('scatter.html')
    
    class Scatterplot
      include Summarizable
      attr_accessor :name
      # Total width of Scatterplot
      attr_accessor :width
      # Total height of Scatterplot
      attr_accessor :height
      attr_accessor :dot_alpha
      # Add a line on median of x and y axis 
      attr_accessor :line_median
      # Top margin
      attr_accessor :margin_top
      # Bottom margin
      attr_accessor :margin_bottom
      # Left margin
      attr_accessor :margin_left
      # Right margin
      attr_accessor :margin_right
      
      attr_reader   :data
      attr_reader :v1,:v2
      
      # Array with assignation to groups of bars
      # For example, for four vectors, 
      #   boxplot.groups=[1,2,1,3]
      # Assign same color to first and third element, and different to
      # second and fourth
      attr_accessor :groups

      
      attr_reader :x_scale, :y_scale
      # Minimum value on x axis. Calculated automaticly from data if not set
      attr_accessor :minimum_x
      # Maximum value on x axis. Calculated automaticly from data if not set
      attr_accessor :maximum_x
      # Minimum value on y axis. Set to 0 if not set
      attr_accessor :minimum_y
      # Maximum value on y axis. Calculated automaticly from data if not set.
      attr_accessor :maximum_y

      # Create a new Scatterplot.
      # Params:
      # * v1: Vector on X axis
      # * v2: Vector on Y axis
      # * opts: Hash of options. See attributes of Scatterplot
      def initialize(v1,v2,opts=Hash.new)
        @v1_name,@v2_name = v1.name,v2.name
        @v1,@v2           = Statsample.only_valid_clone(v1,v2)
        opts_default={
          :name=>_("Scatterplot (%s - %s)") % [@v1_name, @v2_name],
          :width=>400,
          :height=>300,
          :dot_alpha=>0.5,
          :line_median=>false,
          :margin_top=>10,
          :margin_bottom=>20,
          :margin_left=>20,
          :margin_right=>20,
          :minimum_x=>nil,
          :maximum_x=>nil,
          :minimum_y=>nil,
          :maximum_y=>nil,
          :groups=>nil
        }
        @opts=opts_default.merge(opts)
        opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
        @data=[]
        @v1.each_with_index {|d1,i|
          @data.push({:x=>d1, :y=>@v2[i]})
        }
      end
      # Add a rule on median of X and Y axis
      def add_line_median(vis) # :nodoc:
        that=self
        x=@x_scale
        y=@y_scale
        vis.execute {
          rule do
            data [that.v1.median]
            left x
            stroke_style Rubyvis.color("#933").alpha(0.5)
            label(:anchor=>"top") do
              text x.tick_format
            end
          end
          rule do
            data [that.v2.median]
            bottom y
            stroke_style Rubyvis.color("#933").alpha(0.5)
            label(:anchor=>"right") do
              text y.tick_format
            end
          end  
        }
        
      end
      # Returns a Rubyvis panel with scatterplot
      def rubyvis_panel # :nodoc:
        that=self
        #p @v1.map {|v| v}
        
        @minimum_x||=@v1.min
        @maximum_x||=@v1.max
        @minimum_y||=@v2.min
        @maximum_y||=@v2.max
        
        colors=Rubyvis::Colors.category10
        
        margin_hor=margin_left + margin_right
        margin_vert=margin_top  + margin_bottom
        
        x=Rubyvis::Scale.linear(@minimum_x, @maximum_x).range(0, width - margin_hor)
        y=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
        @x_scale=x
        @y_scale=y
        vis=Rubyvis::Panel.new do |pan| 
          pan.width  width  - margin_hor
          pan.height height - margin_vert
          pan.bottom margin_bottom
          pan.left   margin_left
          pan.right  margin_right
          pan.top    margin_top
          # X axis
          pan.rule do
            data y.ticks
            bottom y
            stroke_style {|d| d!=0 ? "#eee" : "#000"}
            label(:anchor=>'left') do
              visible {|d| d!=0 and  d < that.width}
              text y.tick_format
            end
          end
          
          # Y axis
          pan.rule do
            data x.ticks
            left x
            stroke_style {|d| d!=0 ? "#eee" : "#000"}
            label(:anchor=>'bottom') do
              visible {|d| d>0 and d < that.height}
              text x.tick_format
            end
          end
          # Add lines on median
          add_line_median(pan) if line_median

          pan.panel do
            data(that.data)
            dot do
              left   {|d| x[d[:x]]}
              bottom {|d| y[d[:y]]}
              
              fill_style {|v| 
                alpha=(that.dot_alpha-0.3<=0) ? 0.1 : that.dot_alpha-0.3
                if that.groups
                  
                  colors.scale(that.groups[index]).alpha(alpha)
                else
                  colors.scale(0).alpha(alpha)
                end
              }
              
              stroke_style {|v|
                if that.groups
                  colors.scale(that.groups[parent.index]).alpha(that.dot_alpha)
                else
                  colors.scale(0).alpha(that.dot_alpha)
                end
              }
              shape_radius 2
            end
          end
        end
        vis
      end
      # Returns SVG with scatterplot
      def to_svg
        rp=rubyvis_panel
        rp.render
        rp.to_svg
      end
      def report_building(builder) # :nodoc:
        builder.section(:name=>name) do |b|
          b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
        end
        
      end
    end
  end
end


================================================
FILE: lib/statsample/graph.rb
================================================
require 'statsample/graph/scatterplot'
require 'statsample/graph/boxplot'
require 'statsample/graph/histogram'
module Statsample
  # Several Graph, based on Rubyvis
  # * Statsample::Graph::Boxplot
  # * Statsample::Graph::Histogram  
  # * Statsample::Graph::Scatterplot
  module Graph
  end
end


================================================
FILE: lib/statsample/histogram.rb
================================================
module Statsample
  # A histogram consists of a set of bins which count the 
  # number of events falling into a given range of a continuous variable x. 
  # 
  # This implementations follows convention of GSL
  # for specification.
  # 
  #  * Verbatim: *
  #
  #  The range for bin[i] is given by range[i] to range[i+1]. 
  #  For n bins there are n+1 entries in the array range. 
  #  Each bin is inclusive at the lower end and exclusive at the upper end. 
  #  Mathematically this means that the bins are defined 
  #  by the following inequality,
  # 
  #   bin[i] corresponds to range[i] <= x < range[i+1]
  # 
  #  Here is a diagram of the correspondence between ranges and bins
  #  on the number-line for x,
  # 
  # 
  #      [ bin[0] )[ bin[1] )[ bin[2] )[ bin[3] )[ bin[4] )
  #   ---|---------|---------|---------|---------|---------|---  x
  #    r[0]      r[1]      r[2]      r[3]      r[4]      r[5]
  # 
  # 
  #  In this picture the values of the range array are denoted by r. 
  #  On the left-hand side of each bin the square bracket ‘[’ denotes 
  #  an inclusive lower bound ( r <= x), and the round parentheses ‘)’ 
  #  on the right-hand side denote an exclusive upper bound (x < r). 
  #  Thus any samples which fall on the upper end of the histogram are 
  #  excluded. 
  #  If you want to include this value for the last bin you will need to 
  #  add an extra bin to your histogram. 
  #
  #
  # == Reference:
  # * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
  
    class Histogram
      include Enumerable
      class << self
        # Alloc +n_bins+, using +range+ as ranges of bins
        def alloc(n_bins, range=nil, opts=Hash.new)
          Histogram.new(n_bins, range, opts)
          
        end
        # Alloc +n_bins+ bins, using +p1+ as minimum and +p2+
        # as maximum
        def alloc_uniform(n_bins, p1=nil,p2=nil)
          if p1.is_a? Array
            min,max=p1
          else
            min,max=p1,p2
          end
          range=max - min
          step=range / n_bins.to_f
          range=(n_bins+1).times.map {|i| min + (step*i)}
          Histogram.new(range)
        end
      end
      attr_accessor :name
      attr_reader :bin
      attr_reader :range
      include GetText
      bindtextdomain("statsample")
      def initialize(p1, min_max=false, opts=Hash.new)
        
        if p1.is_a? Array
          range=p1
          @n_bins=p1.size-1
        elsif p1.is_a? Integer
          @n_bins=p1
        end
        
        @bin=[0.0]*(@n_bins)
        if(min_max)
          min, max=min_max[0], min_max[1]
          range=Array.new(@n_bins+1)
          (@n_bins+1).times {|i| range[i]=min+(i*(max-min).quo(@n_bins)) }
        end
        range||=[0.0]*(@n_bins+1)
        set_ranges(range)
        @name=""
        opts.each{|k,v|
        self.send("#{k}=",v) if self.respond_to? k
        }
      end
      # Number of bins
      def bins
        @n_bins
      end
      # 
      def increment(x, w=1)
        if x.respond_to? :each
          x.each{|y| increment(y,w) }
        elsif x.is_a? Numeric
          (range.size-1).times do |i|
            if x>=range[i] and x<range[i+1]
              @bin[i]+=w
              break
            end
          end
        end
      end
      def set_ranges(range)
        raise "Range size should be bin+1" if range.size!=@bin.size+1
        @range=range
      end
      def get_range(i)
        [@range[i],@range[i+1]]
      end
      def max
        @range.last
      end
      def min
        @range.first
      end
      def max_val
        @bin.max
      end
      def min_val
        @bin.min
      end
      def each
        bins.times.each do |i|
          r=get_range(i)
          arg={:i=>i, :low=>r[0],:high=>r[1], :middle=>(r[0]+r[1]) / 2.0,  :value=>@bin[i]}
          yield arg
        end
      end
      def estimated_variance
        sum,n=0,0
        mean=estimated_mean
        each do |v|
          sum+=v[:value]*(v[:middle]-mean)**2
          n+=v[:value]
        end
        sum / (n-1)
      end
      def estimated_standard_deviation
        Math::sqrt(estimated_variance)
      end
      def estimated_mean
        sum,n=0,0
        each do |v|
          sum+= v[:value]* v[:middle]
          n+=v[:value]
        end
        sum / n
      end
      alias :mean :estimated_mean
      alias :sigma :estimated_standard_deviation
      
      def sum(start=nil,_end=nil)
        start||=0
        _end||=@n_bins-1
        (start.._end).inject(0) {|ac,i| ac+@bin[i]}
      end
      def report_building(generator)
        hg=Statsample::Graph::Histogram.new(self)
        generator.parse_element(hg)
      end
      def report_building_text(generator)
        @range.each_with_index do |r,i|
          next if i==@bin.size
          generator.text(sprintf("%5.2f : %d", r, @bin[i]))
        end
      end
    end
end


================================================
FILE: lib/statsample/matrix.rb
================================================
class ::Vector
  def to_matrix
    ::Matrix.columns([self.to_a])
  end
  def to_vector
    self
  end
end
class ::Matrix
  def to_matrix
    self
  end
  def to_dataset
    f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) }
    ds=Statsample::Dataset.new(f)
    f.each do |ff|
      ds[ff].type=:scale
      ds[ff].name=ff
    end
    row_size.times {|i|
      ds.add_case_array(self.row(i).to_a)
    }
    ds.update_valid_data
    ds.name=self.name if self.respond_to? :name
    ds
  end
  if defined? :eigenpairs
    alias_method :eigenpairs_ruby, :eigenpairs
  end
  
  if Statsample.has_gsl?
    # Optimize eigenpairs of extendmatrix module using gsl
    def eigenpairs
      to_gsl.eigenpairs
    end
  end
  
  def eigenvalues
    eigenpairs.collect {|v| v[0]}
  end
  def eigenvectors
    eigenpairs.collect {|v| v[1]}
  end
  def eigenvectors_matrix
    Matrix.columns(eigenvectors)
  end
  
  
  def to_gsl
    out=[]
    self.row_size.times{|i|
      out[i]=self.row(i).to_a
    }
    GSL::Matrix[*out]
  end
end

module GSL
  class Vector
    class Col
      def to_matrix
      ::Matrix.columns([self.size.times.map {|i| self[i]}])
      end
      def to_ary
        to_a
      end
      def to_gsl
        self
      end
    end
  end
  class Matrix
    def to_gsl
      self
    end
    
    def to_dataset
      f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) }
      ds=Statsample::Dataset.new(f)
      f.each do |ff|
        ds[ff].type=:scale
        ds[ff].name=ff
      end
      row_size.times {|i|
        ds.add_case_array(self.row(i).to_a)
      }
      ds.update_valid_data
      ds.name=self.name if self.respond_to? :name
      ds
    end
    
    def row_size
      size1
    end
    def column_size
      size2
    end
    def determinant
      det
    end
    def inverse
      GSL::Linalg::LU.invert(self)
    end
    def eigenvalues
      eigenpairs.collect {|v| v[0]}
    end
    def eigenvectors
      eigenpairs.collect {|v| v[1]}
    end
    
    # Matrix sum of squares
    def mssq
      sum=0
      to_v.each {|i| sum+=i**2}
      sum
    end
    
    def eigenvectors_matrix
      eigval, eigvec= GSL::Eigen.symmv(self)
      GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC)
      eigvec 
    end
    def eigenpairs
      eigval, eigvec= GSL::Eigen.symmv(self)
      GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC)
      @eigenpairs=eigval.size.times.map {|i|
        [eigval[i],eigvec.get_col(i)]
      }
    end
    
    #def eigenpairs_ruby
    #  self.to_matrix.eigenpairs_ruby
    #end
    def square?
      size1==size2
    end
    def to_matrix
      rows=self.size1
      cols=self.size2
      out=(0...rows).collect{|i| (0...cols).collect {|j| self[i,j]} }
      ::Matrix.rows(out)
    end
    def total_sum
      sum=0
      size1.times {|i|
        size2.times {|j|
          sum+=self[i,j]
        }
      }
      sum
    end
  end
end

module Statsample
  # Module to add names to X and Y fields
  module NamedMatrix
    include Summarizable  

    def fields
    raise "Should be square" if !square?
    fields_x
    end
    def fields=(v)
    raise "Matrix should be square" if !square?
    @fields_x=v
    @fields_y=v
    end
    def fields_x=(v)
    raise "Size of fields != row_size" if v.size!=row_size
    @fields_x=v
    end
    def fields_y=(v)
    raise "Size of fields != column_size" if v.size!=column_size
    @fields_y=v
    end
    def fields_x
    @fields_x||=row_size.times.collect {|i| _("X%d") % i} 
    end
    def fields_y
    @fields_y||=column_size.times.collect {|i| _("Y%d") % i} 
    end

    def name
      @name||=get_new_name
    end
    def name=(v)
      @name=v
    end
    def get_new_name
      @@named_matrix||=0
      @@named_matrix+=1
      _("Matrix %d") % @@named_matrix
    end
    
  end
  # Module to add method for variance/covariance and correlation matrices
  # == Usage
  #  matrix=Matrix[[1,2],[2,3]]
  #  matrix.extend CovariateMatrix
  # 
  module CovariateMatrix
    include NamedMatrix
    @@covariatematrix=0

    # Get type of covariate matrix. Could be :covariance or :correlation
    def _type
      if row_size==column_size
        if row_size.times.find {|i| self[i,i]!=1.0}
          :covariance
        else
          :correlation
        end
      else
        @type
      end
      
    end
    def _type=(t)
      @type=t
    end
    def correlation
      if(_type==:covariance)
        matrix=Matrix.rows(row_size.times.collect { |i|
          column_size.times.collect { |j|
            if i==j
              1.0
            else
              self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
            end
          }
        })
        matrix.extend CovariateMatrix 
        matrix.fields_x=fields_x
        matrix.fields_y=fields_y
        matrix._type=:correlation
        matrix
      else
        self
      end
    end
    
    
    # Get variance for field k
    # 
    def variance(k)
      submatrix([k])[0,0]
    end
    
    def get_new_name
      @@covariatematrix+=1
      _("Covariate matrix %d") % @@covariatematrix
    end
    
    # Select a submatrix of factors. If you have a correlation matrix
    # with a, b and c, you could obtain a submatrix of correlations of
    # a and b, b and c or a and b
    #
    # You could use labels or index to select the factors.
    # If you don't specify columns, its will be equal to rows.
    #
    # Example:
    #   a=Matrix[[1.0, 0.3, 0.2],
    #            [0.3, 1.0, 0.5], 
    #            [0.2, 0.5, 1.0]]
    #   a.extend CovariateMatrix
    #   a.fields=%w{a b c}
    #   a.submatrix(%w{c a}, %w{b})
    #   => Matrix[[0.5],[0.3]]
    #   a.submatrix(%w{c a})
    #   => Matrix[[1.0, 0.2] , [0.2, 1.0]]
    def submatrix(rows,columns=nil)
      raise ArgumentError, "rows shouldn't be empty" if rows.respond_to? :size and rows.size==0
      columns||=rows
      # Convert all fields on index
      row_index=rows.collect {|v| 
        r=v.is_a?(Numeric) ? v : fields_x.index(v)
        raise "Index #{v} doesn't exists on matrix" if r.nil?
        r
      }
      column_index=columns.collect {|v| 
        r=v.is_a?(Numeric) ? v : fields_y.index(v)
        raise "Index #{v} doesn't exists on matrix" if r.nil?
        r
      }
      
      
      fx=row_index.collect {|v| fields_x[v]}
      fy=column_index.collect {|v| fields_y[v]}
        
      matrix= Matrix.rows(row_index.collect {|i|
        row=column_index.collect {|j| self[i,j]}})
      matrix.extend CovariateMatrix 
      matrix.fields_x=fx
      matrix.fields_y=fy
      matrix._type=_type
      matrix
    end
    def report_building(generator)
      @name||= (_type==:correlation ? _("Correlation"):_("Covariance"))+_(" Matrix")
      generator.table(:name=>@name, :header=>[""]+fields_y) do |t|
        row_size.times {|i|
          t.row([fields_x[i]]+row(i).to_a.collect {|i1|
              i1.nil? ? "--" : sprintf("%0.3f",i1).gsub("0.",".")
          })
        }
      end
    end
  end
end


================================================
FILE: lib/statsample/multiset.rb
================================================
module Statsample
  # Multiset joins multiple dataset with the same fields and vectors
  # but with different number of cases. 
  # This is the base class for stratified and cluster sampling estimation
  class Multiset
    # Name of fields
    attr_reader :fields
    # Array with Statsample::Dataset
    attr_reader :datasets
    # To create a multiset
    # * Multiset.new(%w{f1 f2 f3}) # define only fields
    def initialize(fields)
        @fields=fields
        @datasets={}
    end
    def self.new_empty_vectors(fields,ds_names) 
        ms=Multiset.new(fields)
        ds_names.each{|d|
            ms.add_dataset(d,Dataset.new(fields))
        }
        ms
    end
    # Generate a new dataset as a union of partial dataset
    # If block given, this is applied to each dataset before union
    def union(&block)
      union_field={}
      types={}
      names={}
      labels={}
      each do |k,ds|
        if block
          ds=ds.dup
          yield k,ds
        end
        @fields.each do |f|
          union_field[f]||=Array.new
          union_field[f].concat(ds[f].data)
          types[f]||=ds[f].type
          names[f]||=ds[f].name
          labels[f]||=ds[f].labels
        end
      end
      
      @fields.each do |f|
        union_field[f]=union_field[f].to_vector(types[f])
        union_field[f].name=names[f]
        union_field[f].labels=labels[f]
      end
      ds_union=union_field.to_dataset
      ds_union.fields=@fields
      ds_union
    end
    def datasets_names
        @datasets.keys.sort
    end
    def n_datasets
        @datasets.size
    end
    def add_dataset(key,ds)
      if(ds.fields!=@fields)
       raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
      else
          @datasets[key]=ds
      end
    end
    def sum_field(field)
      @datasets.inject(0) {|a,da|
        stratum_name=da[0]
        vector=da[1][field]
        val=yield stratum_name,vector
        a+val
      }
    end
    def collect_vector(field)
      @datasets.collect {|k,v|
        yield k, v[field]
      }
    end
    
    def each_vector(field)
      @datasets.each {|k,v|
        yield k, v[field]
      }
    end
    def[](i)
      @datasets[i]
    end
    def each(&block)
      @datasets.each {|k,ds|
        next if ds.cases==0
        block.call(k,ds)
      }
    end
  end
  class StratifiedSample
    class << self
      # mean for an array of vectors
      def mean(*vectors)
        n_total=0
        means=vectors.inject(0){|a,v|
          n_total+=v.size
          a+v.sum
        }
        means.to_f/n_total
      end
      
      def standard_error_ksd_wr(es)
        n_total=0
        sum=es.inject(0){|a,h|
            n_total+=h['N']
            a+((h['N']**2 * h['s']**2) / h['n'].to_f)
        }
        (1.to_f / n_total)*Math::sqrt(sum)
      end
      
      
      def variance_ksd_wr(es)
        standard_error_ksd_wr(es)**2
      end
      def calculate_n_total(es)
        es.inject(0) {|a,h| a+h['N'] }
      end
      # Source : Cochran (1972)
      
      def variance_ksd_wor(es)
      n_total=calculate_n_total(es)
      es.inject(0){|a,h|
        val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
        a+val
      }
      end
      def standard_error_ksd_wor(es)
        Math::sqrt(variance_ksd_wor(es))
      end
      
      
      def variance_esd_wor(es)
        n_total=calculate_n_total(es)
        sum=es.inject(0){|a,h|
          val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
          a+val
        }
        (1.0/(n_total**2))*sum
      end
      
      
      def standard_error_esd_wor(es)
        Math::sqrt(variance_ksd_wor(es))
      end
      # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
      def variance_esd_wr(es)
        n_total=calculate_n_total(es)
          sum=es.inject(0){|a,h|
            val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
            a+val
          }
          (1.0/(n_total**2))*sum
      end
      def standard_error_esd_wr(es)
        Math::sqrt(variance_esd_wr(es))
      end
      
      def proportion_variance_ksd_wor(es)
        n_total=calculate_n_total(es)
          es.inject(0){|a,h|
            val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
            a+val
          }
      end
      def proportion_sd_ksd_wor(es)
          Math::sqrt(proportion_variance_ksd_wor(es))
      end
      
      
      def proportion_sd_ksd_wr(es)
        n_total=calculate_n_total(es)
        sum=es.inject(0){|a,h|
          val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
          a+val
        }
        Math::sqrt(sum) * (1.0/n_total)
      end
      def proportion_variance_ksd_wr(es)
          proportion_variance_ksd_wor(es)**2
      end
      
      def proportion_variance_esd_wor(es)
        n_total=n_total=calculate_n_total(es)
        
        sum=es.inject(0){|a,h|
          a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
          a+val
        }
        Math::sqrt(sum) * (1.0/n_total**2)
      end
      def proportion_sd_esd_wor(es)
          Math::sqrt(proportion_variance_ksd_wor(es))
      end
    end
    
    def initialize(ms,strata_sizes)
      raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
      @ms=ms
      raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
      @strata_sizes=strata_sizes
      @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
      @strata_number=@ms.n_datasets
      @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
    end
    # Number of strata
    def strata_number
      @strata_number
    end
    # Population size. Equal to sum of strata sizes
    # Symbol: N<sub>h</sub>
    def population_size
      @population_size
    end
    # Sample size. Equal to sum of sample of each stratum
    def sample_size
      @sample_size
    end
    # Size of stratum x
    def stratum_size(h)
      @strata_sizes[h]
    end
    def vectors_by_field(field)
      @ms.datasets.collect{|k,ds|
        ds[field]
      }
    end
    # Population proportion based on strata
    def proportion(field, v=1)
      @ms.sum_field(field) {|s_name,vector|
      stratum_ponderation(s_name)*vector.proportion(v)
      }
    end
    # Stratum ponderation.
    # Symbol: W\<sub>h\</sub>
    def stratum_ponderation(h)
      @strata_sizes[h].to_f / @population_size
    end
    alias_method :wh, :stratum_ponderation
    
    # Population mean based on strata
    def mean(field)
      @ms.sum_field(field) {|s_name,vector|
      stratum_ponderation(s_name)*vector.mean
      }
    end
    # Standard error with estimated population variance and without replacement.
    # Source: Cochran (1972)
    def standard_error_wor(field)
      es=@ms.collect_vector(field) {|s_n, vector|
        {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
      }
      
      StratifiedSample.standard_error_esd_wor(es)
    end
    
    # Standard error with estimated population variance and without replacement.
    # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
    
    def standard_error_wor_2(field)
      sum=@ms.sum_field(field) {|s_name,vector|
        s_size=@strata_sizes[s_name]
      (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
      }
      (1/@population_size.to_f)*Math::sqrt(sum)
    end
    
    def standard_error_wr(field)
      es=@ms.collect_vector(field) {|s_n, vector|
        {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
      }
      
      StratifiedSample.standard_error_esd_wr(es)
    end
    def proportion_sd_esd_wor(field,v=1)
      es=@ms.collect_vector(field) {|s_n, vector|
        {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
      }
      
      StratifiedSample.proportion_sd_esd_wor(es)
    end
    
    def proportion_standard_error(field,v=1)
      prop=proportion(field,v)
      sum=@ms.sum_field(field) {|s_name,vector|
        nh=vector.size
        s_size=@strata_sizes[s_name]
        (s_size**2 * (1-(nh / s_size)) * prop * (1-prop) / (nh - 1 ))
      }
      (1.quo(@population_size)) * Math::sqrt(sum)
    end
    # Cochran(1971), p. 150 
    def variance_pst(field,v=1)
      sum=@ms.datasets.inject(0) {|a,da|
        stratum_name=da[0]
        ds=da[1]
        nh=ds.cases.to_f
        s_size=@strata_sizes[stratum_name]
        prop=ds[field].proportion(v)
        a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
      }
      (1/@population_size.to_f ** 2)*sum
    end
  end
end


================================================
FILE: lib/statsample/regression/multiple/alglibengine.rb
================================================
if HAS_ALGIB
module Statsample
module Regression
module Multiple
# Class for Multiple Regression Analysis
# Requires Alglib gem and uses a listwise aproach.
# Faster than GslEngine on massive prediction use, because process is c-based.
# Prefer GslEngine if you need good memory use.
# If you need pairwise, use RubyEngine
# Example:
#
#   @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
#   @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
#   @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
#   @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
#   ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
#   lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,'y')
#            
class AlglibEngine < BaseEngine
  def initialize(ds,y_var, opts=Hash.new)
    super    
    @ds=ds.dup_only_valid
    @ds_valid=@ds
    @dy=@ds[@y_var]
    @ds_indep=ds.dup(ds.fields-[y_var])
    # Create a custom matrix
    columns=[]
    @fields=[]
    @ds.fields.each{|f|
        if f!=@y_var
            columns.push(@ds[f].to_a)
            @fields.push(f)
        end
    }
    @dep_columns=columns.dup
    columns.push(@ds[@y_var])
    matrix=Matrix.columns(columns)
    @lr_s=nil
    @lr=::Alglib::LinearRegression.build_from_matrix(matrix)
    @coeffs=assign_names(@lr.coeffs)
    
  end
    
    def _dump(i)
        Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
    end
    def self._load(data)
        h=Marshal.load(data)
        self.new(h['ds'], h['y_var'])
    end
    
    def coeffs
        @coeffs
    end
    # Coefficients using a constant
    # Based on http://www.xycoon.com/ols1.htm
    def matrix_resolution
        mse_p=mse
        columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
        columns.unshift([1.0]*@ds.cases)
        y=Matrix.columns([@dy.data.map  {|i| i.to_f}])
        x=Matrix.columns(columns)
        xt=x.t
        matrix=((xt*x)).inverse*xt
        matrix*y
    end
    def r2
        r**2
    end
    def r
        Bivariate::pearson(@dy,predicted)
    end  
    def sst
        @dy.ss
    end
    def constant
        @lr.constant
    end
    def standarized_coeffs
        l=lr_s
        assign_names(l.coeffs)
    end
    def lr_s
        if @lr_s.nil?
            build_standarized
        end
        @lr_s
    end
    def build_standarized
        @ds_s=@ds.standarize
        columns=[]
        @ds_s.fields.each{|f|
            columns.push(@ds_s[f].to_a) unless f==@y_var
        }
        @dep_columns_s=columns.dup
        columns.push(@ds_s[@y_var])
        matrix=Matrix.columns(columns)
        @lr_s=Alglib::LinearRegression.build_from_matrix(matrix)
    end
    def process(v)
        @lr.process(v)
    end
    def process_s(v)
        lr_s.process(v)
    end
    # ???? Not equal to SPSS output
    def standarized_residuals
        res=residuals
        red_sd=residuals.sds
        res.collect {|v|
            v.quo(red_sd)
        }.to_vector(:scale)
    end
end
end
end
end # for Statsample
end # for if

        
================================================
FILE: lib/statsample/regression/multiple/baseengine.rb
================================================
module Statsample
  module Regression
    module Multiple
      # Base class for Multiple Regression Engines
      class BaseEngine
        include Statsample::Summarizable
        # Name of analysis
        attr_accessor :name
        # Minimum number of  valid case for pairs of correlation
        attr_reader :cases
        # Number of valid cases (listwise)
        attr_reader :valid_cases
        # Number of total cases (dataset.cases)
        attr_reader :total_cases
        
        attr_accessor :digits
        def self.univariate?
          true
        end
        def initialize(ds, y_var, opts = Hash.new)
          @ds=ds
          @predictors_n=@ds.fields.size-1
          @total_cases=@ds.cases
          @cases=@ds.cases
          @y_var=y_var
          @r2=nil
          @name=_("Multiple Regression:  %s over %s") % [ ds.fields.join(",") , @y_var]
          
          
          opts_default={:digits=>3}
          @opts=opts_default.merge opts
          
          @opts.each{|k,v|
            self.send("#{k}=",v) if self.respond_to? k
          }
          
        end
        # Calculate F Test
        def anova
          @anova||=Statsample::Anova::OneWay.new(:ss_num=>ssr, :ss_den=>sse, :df_num=>df_r, :df_den=>df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
        end
        # Standard error of estimate
        def se_estimate
          Math::sqrt(sse.quo(df_e))
        end
        # Retrieves a vector with predicted values for y
        def predicted
          @total_cases.times.collect { |i|
            invalid=false
            vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
            if invalid
              nil
            else
              process(vect)
            end
          }.to_vector(:scale)
        end
        # Retrieves a vector with standarized values for y
        def standarized_predicted
          predicted.standarized
        end
        # Retrieves a vector with residuals values for y
        def residuals
          (0...@total_cases).collect{|i|
            invalid=false
            vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
            if invalid or @ds[@y_var][i].nil?
              nil
            else
              @ds[@y_var][i] - process(vect)
            end
          }.to_vector(:scale)
        end
        # R Multiple
        def r
          raise "You should implement this"
        end
        # Sum of squares Total
        def sst
          raise "You should implement this"
        end
        # R^2 Adjusted.
        # Estimate Population R^2 usign Ezequiel formula.
        # Always lower than sample R^2
        # == Reference:
        # * Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.
        def r2_adjusted
          r2-((1-r2)*@predictors_n).quo(df_e)
        end
        # Sum of squares (regression)
        def ssr
          r2*sst
        end
        # Sum of squares (Error)
        def sse
          sst - ssr
        end
        # T values for coeffs
        def coeffs_t
          out={}
          se=coeffs_se
          coeffs.each do |k,v|
            out[k]=v / se[k]
          end
          out
        end
        # Mean square Regression
        def msr
          ssr.quo(df_r)
        end
        # Mean Square Error
        def mse
          sse.quo(df_e)
        end
        # Degrees of freedom for regression
        def df_r
          @predictors_n
        end
        # Degrees of freedom for error
        def df_e
          @valid_cases-@predictors_n-1
        end
        # Fisher for Anova
        def f
          anova.f
        end
        # p-value of Fisher
        def probability
          anova.probability
        end
        # Tolerance for a given variable
        # http://talkstats.com/showthread.php?t=5056
        def tolerance(var)
          ds=assign_names(@dep_columns)
          ds.each{|k,v|
          ds[k]=v.to_vector(:scale)
          }
          lr=self.class.new(ds.to_dataset,var)
          1-lr.r2
        end
        # Tolerances for each coefficient
        def coeffs_tolerances
          @fields.inject({}) {|a,f|
          a[f]=tolerance(f);
            a
          }
        end
        # Standard Error for coefficients
        def coeffs_se
          out={}
          mse=sse.quo(df_e)
          coeffs.each {|k,v|
            out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
          }
          out
        end
        # Estandar error of R^2
        # ????
        def se_r2
          Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
        end
         
        # Estimated Variance-Covariance Matrix
        # Used for calculation of se of constant
        def estimated_variance_covariance_matrix
          #mse_p=mse
          columns=[]
          @ds_valid.fields.each{|k|
            v=@ds_valid[k]
            columns.push(v.data) unless k==@y_var
          }
          columns.unshift([1.0]*@valid_cases)
          x=Matrix.columns(columns)
          matrix=((x.t*x)).inverse * mse
          matrix.collect {|i| Math::sqrt(i) if i>=0 }
        end
        # T for constant
        def constant_t
          constant.to_f/constant_se
        end
        # Standard error for constant
        def constant_se
          estimated_variance_covariance_matrix[0,0]
        end
        def report_building(b)
          di="%0.#{digits}f"
          b.section(:name=>@name) do |g|
            c=coeffs
            g.text _("Engine: %s") % self.class
            g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
            g.text _("R=")+(di % r)
            g.text _("R^2=")+(di % r2)
            g.text _("R^2 Adj=")+(di % r2_adjusted)
            g.text _("Std.Error R=")+ (di % se_estimate)
            
            g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') )
            
            g.parse_element(anova)
            sc=standarized_coeffs
            
            cse=coeffs_se
            g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
				t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)])
              @fields.each do |f|
                t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))])
              end  
            end
          end
        end
        
        
        def assign_names(c)
          a={}
          @fields.each_index {|i|
            a[@fields[i]]=c[i]
          }
          a
        end

        # Sum of squares of regression
        # using the predicted value minus y mean
        def ssr_direct
          mean=@dy.mean
          cases=0
          ssr=(0...@ds.cases).inject(0) {|a,i|
            invalid=false
            v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
            if !invalid
              cases+=1
              a+((process(v)-mean)**2)
            else
              a
            end
          }
          ssr
        end
        def sse_direct
          sst-ssr
        end
        def process(v)
          c=coeffs
          total=constant
          @fields.each_index{|i|
            total+=c[@fields[i]]*v[i]
          }
          total
        end
      end
    end
  end
end

================================================
FILE: lib/statsample/regression/multiple/gslengine.rb
================================================
if Statsample.has_gsl?
  module Statsample
    module Regression
      module Multiple
        # Class for Multiple Regression Analysis
        # Requires rbgsl and uses a listwise aproach.
        # Slower on prediction of values than Alglib, because predict is ruby based.
        # Better memory management on multiple (+1000) series of regression.
        # If you need pairwise, use RubyEngine
        # Example:
        #
        #   @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
        #   @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
        #   @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
        #   @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
        #   ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
        #   lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y')
        #
        class GslEngine < BaseEngine
          def initialize(ds,y_var, opts=Hash.new)
            super
            @ds=ds.dup_only_valid
            @ds_valid=@ds
            @valid_cases=@ds_valid.cases
            @dy=@ds[@y_var]
            @ds_indep=ds.dup(ds.fields-[y_var])
            # Create a custom matrix
            columns=[]
            @fields=[]
            max_deps = GSL::Matrix.alloc(@ds.cases, @ds.fields.size)
            constant_col=@ds.fields.size-1
            for i in 0...@ds.cases
              max_deps.set(i,constant_col,1)
            end
            j=0
            @ds.fields.each{|f|
              if f!=@y_var
                @ds[f].each_index{|i1|
                  max_deps.set(i1,j,@ds[f][i1])
                }
                columns.push(@ds[f].to_a)
                @fields.push(f)
                j+=1
              end
            }
            @dep_columns=columns.dup
            @lr_s=nil
            c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
            @constant=c[constant_col]
            @coeffs_a=c.to_a.slice(0...constant_col)
            @coeffs=assign_names(@coeffs_a)
            c=nil
          end

          def _dump(i)
            Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
          end
          def self._load(data)
            h=Marshal.load(data)
            self.new(h['ds'], h['y_var'])
          end

          def coeffs
            @coeffs
          end
          # Coefficients using a constant
          # Based on http://www.xycoon.com/ols1.htm
          def matrix_resolution
            columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
            columns.unshift([1.0]*@ds.cases)
            y=Matrix.columns([@dy.data.map  {|i| i.to_f}])
            x=Matrix.columns(columns)
            xt=x.t
            matrix=((xt*x)).inverse*xt
            matrix*y
          end
          def r2
            r**2
          end
          def r
            Bivariate::pearson(@dy, predicted)
          end
          def sst
            @dy.ss
          end
          def constant
            @constant
          end
          def standarized_coeffs
            l=lr_s
            l.coeffs
          end
          def lr_s
            if @lr_s.nil?
              build_standarized
            end
            @lr_s
          end
          def build_standarized
            @ds_s=@ds.standarize
            @lr_s=GslEngine.new(@ds_s,@y_var)
          end
          def process_s(v)
            lr_s.process(v)
          end
          # ???? Not equal to SPSS output
          def standarized_residuals
            res=residuals
            red_sd=residuals.sds
            res.collect {|v|
              v.quo(red_sd)
            }.to_vector(:scale)
          end

          # Standard error for coeffs
          def coeffs_se
            out={}
            evcm=estimated_variance_covariance_matrix
            @ds_valid.fields.each_with_index do |f,i|

              mi=i+1
              next if f==@y_var
              out[f]=evcm[mi,mi]
            end
            out
          end

        end
      end
    end
  end # for Statsample
end # for if


================================================
FILE: lib/statsample/regression/multiple/matrixengine.rb
================================================
module Statsample
module Regression
module Multiple
  # Pure Ruby Class for Multiple Regression Analysis, based on a covariance or correlation matrix.
  #
  # Use Statsample::Regression::Multiple::RubyEngine if you have a 
  # Dataset, to avoid setting all details.
  # 
  # <b>Remember:</b> NEVER use a Covariance data if you have missing data. Use only correlation matrix on that case.
  #
  # 
  # Example:
  #
  #   matrix=[[1.0, 0.5, 0.2], [0.5, 1.0, 0.7], [0.2, 0.7, 1.0]]
  #   
  #   lr=Statsample::Regression::Multiple::MatrixEngine.new(matrix,2)

class MatrixEngine < BaseEngine 
  # Hash of standard deviation of predictors. 
  # Only useful for Correlation Matrix, because by default is set to 1
  attr_accessor :x_sd
  # Standard deviation of criterion
  # Only useful for Correlation Matrix, because by default is set to 1
  attr_accessor :y_sd
  # Hash of mean for predictors. By default, set to 0
  attr_accessor :x_mean
  
  # Mean for criteria. By default, set to 0
  attr_accessor :y_mean
  
  # Number of cases
  attr_writer :cases
  attr_writer :digits
  # Create object
  #
  def initialize(matrix,y_var, opts=Hash.new)
    matrix.extend Statsample::CovariateMatrix
    raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
    if matrix._type==:covariance
      @matrix_cov=matrix
      @matrix_cor=matrix.correlation
      @no_covariance=false
    else
      @matrix_cor=matrix
      @matrix_cov=matrix
      @no_covariance=true
    end
    
    @y_var=y_var
    @fields=matrix.fields-[y_var]
    
    @n_predictors=@fields.size
    @predictors_n=@n_predictors
    @matrix_x= @matrix_cor.submatrix(@fields)
    @matrix_x_cov= @matrix_cov.submatrix(@fields)
    raise LinearDependency, "Regressors are linearly dependent" if @matrix_x.determinant<1e-15

    
    @matrix_y = @matrix_cor.submatrix(@fields, [y_var])
    @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
    

    @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
    
    @x_sd=@n_predictors.times.inject({}) {|ac,i|
      ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
      ac;
    }
    
    @cases=nil
    @x_mean=@fields.inject({}) {|ac,f|
      ac[f]=0.0
      ac;
    }
    
    @y_mean=0.0
    @name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
    
    opts_default={:digits=>3}
    opts=opts_default.merge opts
    opts.each{|k,v|
        self.send("#{k}=",v) if self.respond_to? k
    }
      result_matrix=@matrix_x_cov.inverse * @matrix_y_cov

    if matrix._type==:covariance
      @coeffs=result_matrix.column(0).to_a
      @coeffs_stan=coeffs.collect {|k,v|
        coeffs[k]*@x_sd[k].quo(@y_sd)
      }
    else
      @coeffs_stan=result_matrix.column(0).to_a
      @coeffs=standarized_coeffs.collect {|k,v|
        standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
      } 
    end
    @total_cases=@valid_cases=@cases
  end
  def cases
    raise "You should define the number of valid cases first" if @cases.nil?
    @cases
  end
  # Get R^2 for the regression
  # For fixed models is the coefficient of determination.
  # On random models, is the 'squared-multiple correlation'
  # Equal to 
  # * 1-(|R| / |R_x|) or
  # * Sum(b_i*r_yi) <- used
  def r2
    @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]} 
  end
  # Multiple correlation, on random models.
  def r
    Math::sqrt(r2)
  end
  # Value of constant
  def constant
    c=coeffs
    @y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
  end
  # Hash of b or raw coefficients
  def coeffs
    assign_names(@coeffs)    
  end
  # Hash of beta or standarized coefficients

  def standarized_coeffs
    assign_names(@coeffs_stan)
  end
  # Total sum of squares
  def sst
    @y_sd**2*(cases-1.0)
  end
  
  # Degrees of freedom for regression
  def df_r
    @n_predictors
  end
  # Degrees of freedom for error
  def df_e
    cases-@n_predictors-1
  end
  # Tolerance for a given variable
  # defined as (1-R^2) of regression of other independent variables
  # over the selected
  # == Reference:
  # * http://talkstats.com/showthread.php?t=5056
  def tolerance(var)
    return 1 if @matrix_x.column_size==1
    lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
    1-lr.r2
  end
  # Standard Error for coefficients.
  # Standard error of a coefficients depends on
  # * Tolerance of the coeffients: Higher tolerances implies higher error
  # * Higher r2 implies lower error
  # == Reference:
  # * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
  #
  def coeffs_se
    out={}
    #mse=sse.quo(df_e)
    coeffs.each {|k,v|
      out[k]=@y_sd.quo(@x_sd[k])*Math::sqrt( 1.quo(tolerance(k)))*Math::sqrt((1-r2).quo(df_e))
    }
    out
  end
  # t value for constant
  def constant_t
    return nil if constant_se.nil?
    constant.to_f / constant_se
  end
  # Standard error for constant.
  # This method recreates the estimaded variance-covariance matrix
  # using means, standard deviation and covariance matrix.
  # So, needs the covariance matrix.
  def constant_se
    return nil if @no_covariance
    means=@x_mean
    #means[@y_var]=@y_mean
    means[:constant]=1
    sd=@x_sd
    #sd[@y_var]=@y_sd
    sd[:constant]=0
    fields=[:constant]+@matrix_cov.fields-[@y_var]
    # Recreate X'X using the variance-covariance matrix
    xt_x=Matrix.rows(fields.collect {|i|
      fields.collect {|j|
        if i==:constant or j==:constant
          cov=0
        elsif i==j
          cov=sd[i]**2
        else
          cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
        end
        cov*(@cases-1)+@cases*means[i]*means[j]
      }
    })
    matrix=xt_x.inverse * mse
    matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
  end
  
end
end
end
end


================================================
FILE: lib/statsample/regression/multiple/rubyengine.rb
================================================
module Statsample
module Regression
module Multiple
# Pure Ruby Class for Multiple Regression Analysis.
# Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values. 
# Coeffient calculation uses correlation matrix between the vectors
# If you need listwise aproach for missing values, use AlglibEngine, because is faster.
# 
# Example:
#
#   @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
#   @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
#   @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
#   @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
#   ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
#   lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')

class RubyEngine < MatrixEngine
  def initialize(ds,y_var, opts=Hash.new)
    matrix=ds.correlation_matrix
    fields_indep=ds.fields-[y_var]
    default={
      :y_mean=>ds[y_var].mean,
      :x_mean=>fields_indep.inject({}) {|ac,f|  ac[f]=ds[f].mean; ac},
      :y_sd=>ds[y_var].sd,
      :x_sd=>fields_indep.inject({}) {|ac,f|  ac[f]=ds[f].sd; ac},
      :cases=>Statsample::Bivariate.min_n_valid(ds)
    }
    opts=opts.merge(default)
    super(matrix, y_var, opts)
    @ds=ds
    @dy=ds[@y_var]
    @ds_valid=ds.dup_only_valid
    @total_cases=@ds.cases
    @valid_cases=@ds_valid.cases
    @ds_indep = ds.dup(ds.fields-[y_var])
    set_dep_columns
  end
  
  def set_dep_columns
    @dep_columns=[]
    @ds_indep.each_vector{|k,v|
      @dep_columns.push(v.data_with_nils)
    }                
  end

  def fix_with_mean
    i=0
    @ds_indep.each do |row|
      empty=[]
      row.each do |k,v|
        empty.push(k) if v.nil?
      end
      if empty.size==1
        @ds_indep[empty[0]][i]=@ds[empty[0]].mean
      end
      i+=1
    end
    @ds_indep.update_valid_data
    set_dep_columns
  end
  def fix_with_regression
    i=0
    @ds_indep.each{|row|
      empty=[]
      row.each{|k,v|
          empty.push(k) if v.nil?
      }
      if empty.size==1
        field=empty[0]
        lr=MultipleRegression.new(@ds_indep,field)
        fields=[]
        @ds_indep.fields.each{|f|
            fields.push(row[f]) unless f==field
        }
        @ds_indep[field][i]=lr.process(fields)
      end
      i+=1
    }
    @ds_indep.update_valid_data
    set_dep_columns
  end
  # Standard error for constant
  def constant_se
    estimated_variance_covariance_matrix[0,0]
  end
end
end
end
end


================================================
FILE: lib/statsample/regression/multiple.rb
================================================
require 'statsample/regression/multiple/baseengine'
module Statsample
  module Regression
    # Module for OLS Multiple Regression Analysis.
    # 
    #  Use:.
    #
    #  require 'statsample'
    #  a=1000.times.collect {rand}.to_scale
    #  b=1000.times.collect {rand}.to_scale
    #  c=1000.times.collect {rand}.to_scale
    #  ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
    #  ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
    #  lr=Statsample::Regression.multiple(ds,'y')
    #  puts lr.summary
    #  Summary for regression of a,b,c over y
    #  *************************************************************
    #  Engine: Statsample::Regression::Multiple::AlglibEngine
    #  Cases(listwise)=1000(1000)
    #  r=0.986
    #  r2=0.973
    #  Equation=0.504+5.011a + 2.995b + 1.988c
    #  ----------------------------
    #  ANOVA TABLE
    #  --------------------------------------------------------------
    #  |  source     | ss       | df  | ms      | f         | s     |
    #  --------------------------------------------------------------
    #  |  Regression | 2979.321 | 3   | 993.107 | 12040.067 | 0.000 |
    #  |  Error      | 82.154   | 996 | 0.082   |           |       |
    #  |  Total      | 3061.475 | 999 |         |           |       |
    #  --------------------------------------------------------------
    #  Beta coefficientes
    #  -----------------------------------------------
    #  |  coeff    | b     | beta  | se    | t       |
    #  -----------------------------------------------
    #  |  Constant | 0.504 | -     | 0.030 | 16.968  |
    #  |  a        | 5.011 | 0.832 | 0.031 | 159.486 |
    #  |  b        | 2.995 | 0.492 | 0.032 | 94.367  |
    #  |  c        | 1.988 | 0.323 | 0.032 | 62.132  |
    #  -----------------------------------------------
    # 
    module Multiple
      # Obtain r2 for regressors
      def self.r2_from_matrices(rxx,rxy)
        matrix=(rxy.transpose*rxx.inverse*rxy)
        matrix[0,0]
      end
      
      class MultipleDependent
        def significance
          0.0
        end
        def initialize(matrix,y_var, opts=Hash.new)
          matrix.extend Statsample::CovariateMatrix
          @matrix=matrix
          @fields=matrix.fields-y_var
          @y_var=y_var
          @q=@y_var.size
          @matrix_cor=matrix.correlation
          @matrix_cor_xx = @matrix_cor.submatrix(@fields)
          @matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
          
          @sxx = @matrix.submatrix(@fields)
          @syy = @matrix.submatrix(y_var, y_var)
          @sxy = @matrix.submatrix(@fields, y_var)
          @syx = @sxy.t
        end
        
        def r2yx
          1- (@matrix_cor.determinant.quo(@matrix_cor_yy.determinant * @matrix_cor_xx.determinant))
        end
        # Residual covariance of Y after accountin with lineal relation with x
        def syyx
          @syy-@syx*@sxx.inverse*@sxy
        end
        def r2yx_covariance
          1-(syyx.determinant.quo(@syy.determinant))
        end
        
        def vxy
          @q-(@syy.inverse*syyx).trace
        end
        def p2yx
          vxy.quo(@q)
        end
      end
      
      
    end
  end
end


================================================
FILE: lib/statsample/regression/simple.rb
================================================
module Statsample
  module Regression
    # Class for calculation of linear regressions with form
    #   y = a+bx
    # To create a Statsample::Regression::Simple object:
    # * <tt> Statsample::Regression::Simple.new_from_dataset(ds,x,y)</tt>
    # * <tt> Statsample::Regression::Simple.new_from_vectors(vx,vy)</tt>
    # * <tt> Statsample::Regression::Simple.new_from_gsl(gsl) </tt>
    #
    class Simple
      include Summarizable
      attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
      attr_accessor :name
      attr_accessor :digits
      def initialize(init_method, *argv)
        self.send(init_method, *argv)
      end
      private_class_method :new
      # Obtain y value given x value
      # x=a+bx

      def y(val_x)
        @a+@b*val_x
      end
      # Obtain x value given y value
      # x=(y-a)/b
      def x(val_y)
        (val_y-@a) / @b.to_f
      end
      # Sum of square error
      def sse
        (0...@vx.size).inject(0) {|acum,i| acum+((@vy[i]-y(@vx[i]))**2)
        }
      end
      def standard_error
        Math::sqrt(sse / (@vx.size-2).to_f)
      end
      # Sum of square regression
      def ssr
        vy_mean=@vy.mean
        (0...@vx.size).inject(0) {|a,i|
          a+((y(@vx[i])-vy_mean)**2)
        }

      end
      # Sum of square total
      def sst
        @vy.sum_of_squared_deviation
      end
      # Value of r
      def r
        @b * (@vx.sds / @vy.sds)
      end
      # Value of r^2
      def r2
        r**2
      end
      class << self
        # Create a regression object giving an array with following parameters:
        # <tt>a,b,cov00, cov01, covx1, chisq, status</tt>
        # Useful to obtain x and y values with a and b values.
        def new_from_gsl(ar)
          new(:init_gsl, *ar)
        end
        # Create a simple regression using two vectors
        def new_from_vectors(vx,vy, opts=Hash.new)
          new(:init_vectors,vx,vy, opts)
        end
        # Create a simple regression using a dataset and two vector names.
        def new_from_dataset(ds,x,y, opts=Hash.new)
          new(:init_vectors,ds[x],ds[y], opts)
        end
      end
      def init_vectors(vx,vy, opts=Hash.new)
        @vx,@vy=Statsample.only_valid_clone(vx,vy)
        x_m=@vx.mean
        y_m=@vy.mean
        num=den=0
        (0...@vx.size).each {|i|
          num+=(@vx[i]-x_m)*(@vy[i]-y_m)
          den+=(@vx[i]-x_m)**2
        }
        @b=num.to_f/den
        @a=y_m - @b*x_m
        
        opts_default={
        :digits=>3, 
        :name=>_("Regression of %s over %s") % [@vx.name, @vy.name]
         }
        @opts=opts_default.merge opts

        @opts.each{|k,v|
          self.send("#{k}=",v) if self.respond_to? k
        }
        
      end
      def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
        @a=a
        @b=b
        @cov00=cov00
        @cov01=cov01
        @covx1=covx1
        @chisq=chisq
        @status=status
      end
      def report_building(gen)
      f="%0.#{digits}f"
        gen.section(:name=>name) do |s|
          s.table(:header=>[_("Variable"), _("Value")]) do |t|
            t.row [_("r"), f % r]
            t.row [_("r^2"), f % r2]
            t.row [_("a"), f % a]
            t.row [_("b"), f % b]
            t.row [_("s.e"), f % standard_error]
          end
        end
      end
      private :init_vectors, :init_gsl
    end
  end
end


================================================
FILE: lib/statsample/regression.rb
================================================
require 'statsample/regression/simple'
require 'statsample/regression/multiple'

require 'statsample/regression/multiple/matrixengine'
require 'statsample/regression/multiple/rubyengine'
require 'statsample/regression/multiple/gslengine'

module Statsample
    # = Module for regression procedures.
    # Use the method on this class to generate
    # analysis.
    # If you need more control, you can
    # create and control directly the objects who computes
    # the regressions.
    # 
    # * Simple Regression :  Statsample::Regression::Simple
    # * Multiple Regression: Statsample::Regression::Multiple
    # * Logit Regression:    Statsample::Regression::Binomial::Logit
    # * Probit Regression:    Statsample::Regression::Binomial::Probit
    module Regression
      
      LinearDependency=Class.new(Exception)
      
      # Create a Statsample::Regression::Simple object, for simple regression
      # * x: independent Vector
      # * y: dependent Vector
      # <b>Usage:</b>
      #   x=100.times.collect {|i| rand(100)}.to_scale
      #   y=100.times.collect {|i| 2+x[i]*2+rand()}.to_scale
      #   sr=Statsample::Regression.simple(x,y)
      #   sr.a
      #   => 2.51763295177808
      #   sr.b
      #   => 1.99973746599856
      #   sr.r
      #   => 0.999987881153254
      def self.simple(x,y)
        Statsample::Regression::Simple.new_from_vectors(x,y)
      end
      
      # Creates one of the Statsample::Regression::Multiple object,
      # for OLS multiple regression.
      # Parameters:
      # * <tt>ds</tt>: Dataset.
      # * y: Name of dependent variable.
      # * opts: A hash with options
      #   * missing_data: Could be
      #     * :listwise: delete cases with one or more empty data (default).
      #     * :pairwise: uses correlation matrix. Use with caution.
      # 
      # <b>Usage:</b>
      #   lr=Statsample::Regression::multiple(ds,'y')
      def self.multiple(ds,y_var, opts=Hash.new)
        missing_data= (opts[:missing_data].nil? ) ? :listwise : opts.delete(:missing_data)
        if missing_data==:pairwise
           Statsample::Regression::Multiple::RubyEngine.new(ds,y_var, opts)
        else
          if Statsample.has_gsl? and false
            Statsample::Regression::Multiple::GslEngine.new(ds, y_var, opts)
          else
            ds2=ds.dup_only_valid
            Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var, opts)
          end
        end
      end
    end
end


================================================
FILE: lib/statsample/reliability/icc.rb
================================================
module Statsample
  module Reliability
    # = Intra-class correlation
    # According to Shrout & Fleiss (1979, p.422): "ICC is the correlation 
    # between one measurement (either a single rating or a mean of 
    # several ratings) on a target and another measurement obtained on that target"
    # == Usage
    #   require 'statsample'
    #   size=1000
    #   a = size.times.map {rand(10)}.to_scale
    #   b = a.recode{|i|i+rand(4)-2}
    #   c  =a.recode{|i|i+rand(4)-2}
    #   d = a.recode{|i|i+rand(4)-2}
    #   ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
    #   # Use :type attribute to set type to summarize
    #   icc=Statsample::Reliability::ICC.new(ds, :type=>:icc_1_k)
    #   puts icc.summary
    # 
    # == Reference
    # * Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
    # * McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.

    class ICC
      include Summarizable
      
      # Create a ICC analysis for a given dataset
      # Each vector is a different measurement. Only uses complete data 
      # (listwise deletion).
      #
      
      attr_reader :df_bt
      attr_reader :df_wt
      attr_reader :df_bj
      attr_reader :df_residual

      attr_reader :ms_bt
      attr_reader :ms_wt
      attr_reader :ms_bj
      attr_reader :ms_residual

      alias :bms :ms_bt
      alias :wms :ms_wt
      alias :jms :ms_bj
      alias :ems :ms_residual
      
      alias :msr :ms_bt
      alias :msw :ms_wt
      alias :msc :ms_bj
      alias :mse :ms_residual
      
      # :section: Shrout and Fleiss ICC denominations
      attr_reader :icc_1_1
      attr_reader :icc_2_1
      attr_reader :icc_3_1
      attr_reader :icc_1_k
      attr_reader :icc_2_k
      attr_reader :icc_3_k

      # :section: McGraw and Wong ICC denominations
      
      attr_reader :icc_1
      attr_reader :icc_c_1
      attr_reader :icc_a_1
      attr_reader :icc_k
      attr_reader :icc_c_k
      attr_reader :icc_a_k
      
      
      attr_reader :n, :k
      attr_reader :total_mean
      # Type of analysis, for easy summarization
      # By default, set to :icc_1
      # * Shrout & Fleiss(1979) denominations
      #   * :icc_1_1
      #   * :icc_2_1
      #   * :icc_3_1
      #   * :icc_1_k
      #   * :icc_2_k
      #   * :icc_3_k
      # * McGraw & Wong (1996) denominations
      #   * :icc_1
      #   * :icc_k
      #   * :icc_c_1
      #   * :icc_c_k
      #   * :icc_a_1
      #   * :icc_a_k

      attr_reader :type
      # ICC value, set with :type
      attr_reader :r
      attr_reader :f
      attr_reader :lbound
      attr_reader :ubound
      
      attr_accessor :g_rho
      attr_accessor :alpha
      attr_accessor :name
      def initialize(ds, opts=Hash.new)
        @ds=ds.dup_only_valid
        @vectors=@ds.vectors.values
        @n=@ds.cases
        @k=@ds.fields.size
        compute
        @g_rho=0
        @alpha=0.05
        @icc_name=nil
        opts_default={:name=>"Intra-class correlation", :type=>:icc_1}
        @opts=opts_default.merge(opts)
        @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
      end
      def type=(v)
        case v
          when :icc_1_1
            @icc_name=_("Shrout & Fleiss ICC(1,1)")
            @r=@icc_1_1
            @f=icc_1_f
            @lbound, @ubound=icc_1_1_ci(@alpha)            
          when :icc_2_1
            @icc_name=_("Shrout & Fleiss ICC(2,1)")
            @r=@icc_2_1
            @f=icc_2_f
            @lbound, @ubound=icc_2_1_ci(@alpha)            
            
          when :icc_3_1
            @icc_name=_("Shrout & Fleiss ICC(3,1)")
            
            @r=@icc_3_1
            @f=icc_3_f
            @lbound, @ubound=icc_3_1_ci(@alpha)            

          when :icc_1_k
            @icc_name=_("Shrout & Fleiss ICC(1,k)")
            
            @r=@icc_1_k
            @f=icc_1_k_f
            @lbound, @ubound=icc_1_k_ci(@alpha)            
          when :icc_2_k
            @icc_name=_("Shrout & Fleiss ICC(2,k)")
            
            @r=@icc_2_k
            @f=icc_2_k_f
            @lbound, @ubound=icc_2_k_ci(@alpha)            
            
          when :icc_3_k
            @icc_name=_("Shrout & Fleiss ICC(3,k)")
            
            @r=@icc_3_k
            @f=icc_3_k_f
            @lbound, @ubound=icc_3_k_ci(@alpha)            
            
            
          when :icc_1
            @icc_name=_("McGraw & Wong ICC(1)")
            
            @r=@icc_1_1
            @f=icc_1_f(@g_rho)
            @lbound, @ubound=icc_1_1_ci(@alpha)
          when :icc_k
            @icc_name=_("McGraw & Wong ICC(K)")
            
            @r=@icc_1_k
            @f=icc_1_k_f(@g_rho)
            @lbound, @ubound=icc_1_k_ci(@alpha)
          when :icc_c_1
            @icc_name=_("McGraw & Wong ICC(C,1)")
            
            @r=@icc_3_1
            @f=icc_c_1_f(@g_rho)
            @lbound, @ubound=icc_3_1_ci(@alpha)

          when :icc_c_k
            @icc_name=_("McGraw & Wong ICC(C,K)")
            
            @r=@icc_3_k
            @f=icc_c_k_f(@g_rho)
            @lbound, @ubound=icc_c_k_ci(@alpha)

          when :icc_a_1
            @icc_name=_("McGraw & Wong ICC(A,1)")
            
            @r=@icc_2_1
            @f=icc_a_1_f(@g_rho)
            @lbound,@ubound = icc_2_1_ci(@alpha)

          when :icc_a_k
            @icc_name=_("McGraw & Wong ICC(A,K)")
            
            @r=@icc_2_k
            @f=icc_a_k_f(@g_rho)
            @lbound,@ubound=icc_2_k_ci(@alpha)

          else
            raise "Type #{v} doesn't exists" 
        end
      end
      def compute
        @df_bt=n-1
        @df_wt=n*(k-1)
        @df_bj=k-1
        @df_residual=(n-1)*(k-1)
        @total_mean=@vectors.inject(0){|ac,v| ac+v.sum}.quo(n*k)
        vm=@ds.vector_mean
        
        @ss_bt=k*vm.ss(@total_mean)
        @ms_bt=@ss_bt.quo(@df_bt)
        
        @ss_bj=n*@vectors.inject(0){|ac,v| ac+(v.mean-@total_mean).square}
        @ms_bj=@ss_bj.quo(@df_bj)
        
        @ss_wt=@vectors.inject(0){|ac,v| ac+(v-vm).ss(0)}
        @ms_wt=@ss_wt.quo(@df_wt)
        
        @ss_residual=@ss_wt-@ss_bj
        @ms_residual=@ss_residual.quo(@df_residual)
        ###
        # Shrout and Fleiss denomination
        ###
        # ICC(1,1) / ICC(1)
        @icc_1_1=(bms-wms).quo(bms+(k-1)*wms) 
        # ICC(2,1) / ICC(A,1)
        @icc_2_1=(bms-ems).quo(bms+(k-1)*ems+k*(jms - ems).quo(n))  
        # ICC(3,1) / ICC(C,1)
        @icc_3_1=(bms-ems).quo(bms+(k-1)*ems) 
        
        
        # ICC(1,K) / ICC(K)
        @icc_1_k=(bms-wms).quo(bms) 
        # ICC(2,K) / ICC(A,k)
        @icc_2_k=(bms-ems).quo(bms+(jms-ems).quo(n))
        # ICC(3,K) / ICC(C,k) = Cronbach's alpha
        @icc_3_k=(bms-ems).quo(bms) 
        
        ###
        # McGraw and Wong
        ###
        
      end
      
      def icc_1_f(rho=0.0)
        num=msr*(1-rho)
        den=msw*(1+(k-1)*rho)
        Statsample::Test::F.new(num, den, @df_bt, @df_wt)
      end
      # One way random F, type k
      def icc_1_k_f(rho=0)
        num=msr*(1-rho)
        den=msw
        Statsample::Test::F.new(num, den, @df_bt, @df_wt)
      end
      
      def icc_c_1_f(rho=0)
        num=msr*(1-rho)
        den=mse*(1+(k-1)*rho)
        Statsample::Test::F.new(num, den, @df_bt, @df_residual)
      end
      def icc_c_k_f(rho=0)
        num=(1-rho)
        den=1-@icc_3_k
        Statsample::Test::F.new(num, den, @df_bt, @df_residual)
      end
      
      def v(a,b)
        ((a*msc+b*mse)**2).quo(((a*msc)**2.quo(k-1))+((b*mse)**2.quo( (n-1) * (k-1))))
      end
      def a(rho)
        (k*rho).quo(n*(1-rho))
      end
      def b(rho)
        1+((k*rho*(n-1)).quo(n*(1-rho)))
      end
      def c(rho)
        rho.quo(n*(1-rho))
      end
      def d(rho)
        1+((rho*(n-1)).quo(n*(1-rho)))
      end
      private :v, :a, :b, :c, :d
      def icc_a_1_f(rho=0)
        fj=jms.quo(ems)
        num=msr
        den=a(rho)*msc+b(rho)*mse
        pp = @icc_2_1
        vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
        vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
        v=vn.quo(vd)
        Statsample::Test::F.new(num, den, @df_bt, v)        
      end
      
      def icc_a_k_f(rho=0)
        num=msr
        den=c(rho)*msc+d(rho)*mse
        
        fj=jms.quo(ems)
        
        pp = @icc_2_k
        vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
        vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
        v=vn.quo(vd)
        
        
        Statsample::Test::F.new(num, den, @df_bt,v)        

      end
      
      # F test for ICC Case 1. Shrout and Fleiss
      def icc_1_f_shrout
        Statsample::Test::F.new(bms, wms, @df_bt, @df_wt)
      end

      # Intervale of confidence for ICC (1,1)
      def icc_1_1_ci(alpha=0.05)
        per=1-(0.5*alpha)
       
        fu=icc_1_f.f*Distribution::F.p_value(per, @df_wt, @df_bt)
        fl=icc_1_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_wt))
        
        [(fl-1).quo(fl+k-1), (fu-1).quo(fu+k-1)]
      end
      
      # Intervale of confidence for ICC (1,k)
      def icc_1_k_ci(alpha=0.05)
        per=1-(0.5*alpha)
        fu=icc_1_f.f*Distribution::F.p_value(per, @df_wt, @df_bt)
        fl=icc_1_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_wt))
        [1-1.quo(fl), 1-1.quo(fu)]
      end
      
      # F test for ICC Case 2
      def icc_2_f
        Statsample::Test::F.new(bms, ems, @df_bt, @df_residual)
      end
      
      
      #
      # F* for ICC(2,1) and ICC(2,k)
      # 
      def icc_2_1_fs(pp,alpha=0.05)
        fj=jms.quo(ems)
        per=1-(0.5*alpha)
        vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
        vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
        v=vn.quo(vd)
        f1=Distribution::F.p_value(per, n-1,v)
        f2=Distribution::F.p_value(per, v, n-1)
        [f1,f2]
      end
     
      
      def icc_2_1_ci(alpha=0.05)
        icc_2_1_ci_mcgraw
      end
      
      # Confidence interval ICC(A,1), McGawn
      
      def icc_2_1_ci_mcgraw(alpha=0.05)
        fd,fu=icc_2_1_fs(icc_2_1,alpha)
        cl=(n*(msr-fd*mse)).quo(fd*(k*msc+(k*n-k-n)*mse)+n*msr)
        cu=(n*(fu*msr-mse)).quo(k*msc+(k*n-k-n)*mse+n*fu*msr)
        [cl,cu]
      end
      
      def icc_2_k_ci(alpha=0.05)
        icc_2_k_ci_mcgraw(alpha)
      end
      
      def icc_2_k_ci_mcgraw(alpha=0.05)
        f1,f2=icc_2_1_fs(icc_2_k,alpha)
        [
        (n*(msr-f1*mse)).quo(f1*(msc-mse)+n*msr),
        (n*(f2*msr-mse)).quo(msc-mse+n*f2*msr)
        ]
        
      end
      def icc_2_k_ci_shrout(alpha=0.05)
        ci=icc_2_1_ci(alpha)
        [(ci[0]*k).quo(1+(k-1)*ci[0]), (ci[1]*k).quo(1+(k-1)*ci[1])]
      end
      
      
      def icc_3_f
        Statsample::Test::F.new(bms, ems, @df_bt, @df_residual)
      end
      
      def icc_3_1_ci(alpha=0.05)
        per=1-(0.5*alpha)
        fl=icc_3_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
        fu=icc_3_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
        [(fl-1).quo(fl+k-1), (fu-1).quo(fu+k-1)]
      end
      
      def icc_3_k_ci(alpha=0.05)
        per=1-(0.5*alpha)
        fl=icc_3_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
        fu=icc_3_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
        [1-1.quo(fl),1-1.quo(fu)]
      end
      
      def icc_c_k_ci(alpha=0.05)
        per=1-(0.5*alpha)
        fl=icc_c_k_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
        fu=icc_c_k_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
        [1-1.quo(fl),1-1.quo(fu)]
      end
      def report_building(b)
         b.section(:name=>name) do |s|
           s.text @icc_name
           s.text _("ICC: %0.4f") % @r
           s.parse_element(@f)
           s.text _("CI (%0.2f): [%0.4f - %0.4f]") % [(1-@alpha)*100, @lbound, @ubound]
         end
      end
    end
  end
end


================================================
FILE: lib/statsample/reliability/multiscaleanalysis.rb
================================================
module Statsample
  module Reliability
    # DSL for analysis of multiple scales analysis. 
    # Retrieves reliability analysis for each scale and
    # provides fast accessors to correlations matrix,
    # PCA and Factor Analysis.
    # 
    # == Usage
    #  @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
    #  @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
    #  @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
    #  @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
    #  ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
    #  opts={:name=>"Scales", # Name of analysis
    #        :summary_correlation_matrix=>true, # Add correlation matrix
    #        :summary_pca } # Add PCA between scales
    #  msa=Statsample::Reliability::MultiScaleAnalysis.new(opts) do |m|
    #    m.scale :s1, ds.clone(%w{x1 x2})
    #    m.scale :s2, ds.clone(%w{x3 x4}), {:name=>"Scale 2"}
    #  end
    #  # Retrieve summary
    #  puts msa.summary 
    class MultiScaleAnalysis
      include Statsample::Summarizable
      # Hash with scales
      attr_reader :scales
      # Name of analysis
      attr_accessor :name
      # Add a correlation matrix on summary
      attr_accessor :summary_correlation_matrix
      # Add PCA to summary
      attr_accessor :summary_pca
      # Add Principal Axis to summary
      attr_accessor :summary_principal_axis
      # Options for Factor::PCA object
      attr_accessor :pca_options
      # Options for Factor::PrincipalAxis 
      attr_accessor :principal_axis_options
      
      # Add Parallel Analysis to summary
      attr_accessor :summary_parallel_analysis
      # Options for Parallel Analysis
      attr_accessor :parallel_analysis_options
      
      # Add MPA to summary
      attr_accessor :summary_map
      # Options for MAP
      attr_accessor :map_options
      
      
      # Generates a new MultiScaleAnalysis
      # Opts could be any accessor of the class 
      # * :name, 
      # * :summary_correlation_matrix
      # * :summary_pca
      # * :summary_principal_axis
      # * :summary_map
      # * :pca_options
      # * :factor_analysis_options
      # * :map_options
      # If block given, all methods should be called
      # inside object environment.
      # 
      def initialize(opts=Hash.new, &block)
        @scales=Hash.new
        @scales_keys=Array.new
        opts_default={  :name=>_("Multiple Scale analysis"),
                        :summary_correlation_matrix=>false,
                        :summary_pca=>false,
                        :summary_principal_axis=>false,
                        :summary_parallel_analysis=>false,
                        :summary_map=>false,
                        :pca_options=>Hash.new,
                        :principal_axis_options=>Hash.new,
                        :parallel_analysis_options=>Hash.new,
                        :map_options=>Hash.new
        }
        @opts=opts_default.merge(opts)
        @opts.each{|k,v|
          self.send("#{k}=",v) if self.respond_to? k
        }

        if block
          block.arity<1 ? instance_eval(&block) : block.call(self)
        end
      end
      # Add or retrieve a scale to analysis.
      # If second parameters is a dataset, generates a ScaleAnalysis 
      # for <tt>ds</tt>, named <tt>code</tt> with options <tt>opts</tt>.
      # 
      # If second parameters is empty, returns the ScaleAnalysis
      # <tt>code</tt>.
      def scale(code, ds=nil, opts=nil)
        if ds.nil?
          @scales[code]
        else
          opts={:name=>_("Scale %s") % code} if opts.nil?
          @scales_keys.push(code)
          @scales[code]=ScaleAnalysis.new(ds, opts)
        end
      end
      # Delete ScaleAnalysis named <tt>code</tt>
      def delete_scale(code)
        @scales_keys.delete code
        @scales.delete code
      end
      # Retrieves a Principal Component Analysis (Factor::PCA)
      # using all scales, using <tt>opts</tt> a options.
      def pca(opts=nil)
        opts||=pca_options        
        Statsample::Factor::PCA.new(correlation_matrix, opts)
      end
      # Retrieve Velicer's MAP
      # using all scales.
      def map(opts=nil)
        opts||=map_options
        Statsample::Factor::MAP.new(correlation_matrix, opts)
      end
      # Retrieves a PrincipalAxis Analysis (Factor::PrincipalAxis)
      # using all scales, using <tt>opts</tt> a options.
      def principal_axis_analysis(opts=nil)
        opts||=principal_axis_options
        Statsample::Factor::PrincipalAxis.new(correlation_matrix, opts)
      end
      def dataset_from_scales
        ds=Dataset.new(@scales_keys)
        @scales.each_pair do |code,scale|
          ds[code.to_s]=scale.ds.vector_sum
          ds[code.to_s].name=scale.name
        end
        ds.update_valid_data
        ds
      end
      def parallel_analysis(opts=nil)
        opts||=parallel_analysis_options
        Statsample::Factor::ParallelAnalysis.new(dataset_from_scales, opts)
      end
      # Retrieves a Correlation Matrix between scales.
      # 
      def correlation_matrix
        Statsample::Bivariate.correlation_matrix(dataset_from_scales)
      end
      def report_building(b) # :nodoc:
        b.section(:name=>name) do |s|
          s.section(:name=>_("Reliability analysis of scales")) do |s2|
            @scales.each_pair do |k, scale|
              s2.parse_element(scale)
            end
          end
          if summary_correlation_matrix
            s.section(:name=>_("Correlation matrix for %s") % name) do |s2|
              s2.parse_element(correlation_matrix)
            end
          end
          if summary_pca
            s.section(:name=>_("PCA for %s") % name) do |s2|
              s2.parse_element(pca)
            end
          end
          if summary_principal_axis
            s.section(:name=>_("Principal Axis for %s") % name) do |s2|
              s2.parse_element(principal_axis_analysis)
            end
          end
          
          if summary_parallel_analysis
            s.section(:name=>_("Parallel Analysis for %s") % name) do |s2|
              s2.parse_element(parallel_analysis)
            end
          end 
          if summary_map
            s.section(:name=>_("MAP for %s") % name) do |s2|
              s2.parse_element(map)
            end
          end           
        end
      end
    end
  end
end

================================================
FILE: lib/statsample/reliability/scaleanalysis.rb
================================================
module Statsample
  module Reliability
    # Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
    # Returns several statistics for complete scale and each item
    # == Usage
    #  @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
    #  @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
    #  @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
    #  @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
    #  ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
    #  ia=Statsample::Reliability::ScaleAnalysis.new(ds)
    #  puts ia.summary
    class ScaleAnalysis
      include Summarizable
      attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean, :cov_m
      attr_accessor :name
      attr_accessor :summary_histogram
      def initialize(ds, opts=Hash.new)
        @dumped=ds.fields.find_all {|f|
          ds[f].variance==0
        }
        
        @ods=ds
        @ds=ds.dup_only_valid(ds.fields - @dumped)
        @ds.name=ds.name
        
        @k=@ds.fields.size        
        @total=@ds.vector_sum
        @o_total=@dumped.size > 0 ? @ods.vector_sum : nil
        
        @vector_mean=@ds.vector_mean
        @item_mean=@vector_mean.mean
        @item_sd=@vector_mean.sd
        
        @mean=@total.mean
        @median=@total.median
        
        @skew=@total.skew
        @kurtosis=@total.kurtosis
        @sd = @total.sd
        @variance=@total.variance
        @valid_n = @total.size
        opts_default={
          :name=>_("Reliability Analysis"),
          :summary_histogram=>true
        }
        @opts=opts_default.merge(opts)
        @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
        
        @cov_m=Statsample::Bivariate.covariance_matrix(@ds)
        # Mean for covariances and variances
        @variances=@k.times.map {|i| @cov_m[i,i]}.to_scale
        @variances_mean=@variances.mean
        @covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
        #begin
          @alpha = Statsample::Reliability.cronbach_alpha(@ds)
          @alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(@ds)
        #rescue => e
        #  raise DatasetException.new(@ds,e), "Error calculating alpha"
        #end
      end
      # Returns a hash with structure
      def item_characteristic_curve
        i=0
        out={}
        total={}
        @ds.each do |row|
          tot=@total[i]
          @ds.fields.each do |f|
            out[f]||= {}
            total[f]||={}
            out[f][tot]||= 0
            total[f][tot]||=0
            out[f][tot]+= row[f]
            total[f][tot]+=1
          end
          i+=1
        end
        total.each do |f,var|
          var.each do |tot,v|
            out[f][tot]=out[f][tot].quo(total[f][tot])
          end
        end
        out
      end
      # =Adjusted R.P.B. for each item
      # Adjusted RPB(Point biserial-correlation) for each item
      #
      def item_total_correlation
        @itc||=@ds.fields.inject({}) do |a,v|
          vector=@ds[v].clone
          ds2=@ds.clone
          ds2.delete_vector(v)
          total=ds2.vector_sum
          a[v]=Statsample::Bivariate.pearson(vector,total)
          a
        end
      end
      def mean_rpb
        item_total_correlation.values.to_scale.mean
      end
      def item_statistics
          @is||=@ds.fields.inject({}) do |a,v|
            a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
            a
          end
      end
      # Returns a dataset with cases ordered by score
      # and variables ordered by difficulty

      def item_difficulty_analysis
        dif={}
        @ds.fields.each{|f| dif[f]=@ds[f].mean }
        dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
        scores_sort={}
        scores=@ds.vector_mean
        scores.each_index{|i| scores_sort[i]=scores[i] }
        scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
        ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
        scores_sort.each do |i,score|
          row=[i, score]
          case_row=@ds.case_as_hash(i)
          dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
          ds_new.add_case_array(row)
        end
        ds_new.update_valid_data
        ds_new
      end
      
      def stats_if_deleted
        @sif||=stats_if_deleted_intern
      end
      
      def stats_if_deleted_intern # :nodoc:
        return Hash.new if @ds.fields.size==1
        @ds.fields.inject({}) do |a,v|
          cov_2=@cov_m.submatrix(@ds.fields-[v])
          #ds2=@ds.clone
          #ds2.delete_vector(v)
          #total=ds2.vector_sum
          a[v]={}
          #a[v][:mean]=total.mean
          a[v][:mean]=@mean-item_statistics[v][:mean]
          a[v][:variance_sample]=cov_2.total_sum
          a[v][:sds]=Math::sqrt(a[v][:variance_sample])
          n=cov_2.row_size
          a[v][:alpha] = (n>=2) ? Statsample::Reliability.cronbach_alpha_from_covariance_matrix(cov_2) : nil
          a
        end
      end
      def report_building(builder) #:nodoc:
        builder.section(:name=>@name) do |s|
          
          
          if @dumped.size>0
            s.section(:name=>"Items with variance=0") do |s1|
              s.table(:name=>_("Summary for %s with all items") % @name) do |t|
                t.row [_("Items"), @ods.fields.size]
                t.row [_("Sum mean"),     "%0.4f" % @o_total.mean]
                t.row [_("S.d. mean"),     "%0.4f" % @o_total.sd]
              end
              s.table(:name=>_("Deleted items"), :header=>['item','mean']) do |t|
                @dumped.each do |f|
                  t.row(["#{@ods[f].name}(#{f})", "%0.5f" % @ods[f].mean])
                end
              end
              s.parse_element(Statsample::Graph::Histogram.new(@o_total, :name=>"Histogram (complete data) for %s" % @name)) if @summary_histogram
            end
          end
          
          
          s.table(:name=>_("Summary for %s") % @name) do |t|
            t.row [_("Valid Items"), @ds.fields.size]
          
          t.row [_("Valid cases"), @valid_n]
          t.row [_("Sum mean"),     "%0.4f" % @mean]
          t.row [_("Sum sd"),       "%0.4f" % @sd  ]
#          t.row [_("Sum variance"), "%0.4f" % @variance]
          t.row [_("Sum median"),   @median]
          t.hr
          t.row [_("Item mean"),    "%0.4f" % @item_mean]
          t.row [_("Item sd"),    "%0.4f" % @item_sd]
          t.hr
          t.row [_("Skewness"),     "%0.4f" % @skew]
          t.row [_("Kurtosis"),     "%0.4f" % @kurtosis]
          t.hr
          t.row [_("Cronbach's alpha"), @alpha ? ("%0.4f" % @alpha) : "--"]
          t.row [_("Standarized Cronbach's alpha"), @alpha_standarized ? ("%0.4f" % @alpha_standarized) : "--" ]
          t.row [_("Mean rpb"), "%0.4f" % mean_rpb]
          
          t.row [_("Variances mean"),  "%g" % @variances_mean]
          t.row [_("Covariances mean") , "%g" % @covariances_mean]
          end
          
          if (@alpha)
            s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.fields.size))
            s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.fields.size))          
          end
          
          
          sid=stats_if_deleted
          is=item_statistics
          itc=item_total_correlation
          
          s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
            @ds.fields.each do |f|
              row=["#{@ds[f].name}(#{f})"]
              if is[f]
                row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]
              else
                row+=["-","-"]
              end
              if sid[f]
                row+= [sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds])]
              else
                row+=%w{- - -}
              end
              if itc[f]
                row+= [sprintf("%0.5f",itc[f])]
              else 
                row+=['-']
              end
              if sid[f] and !sid[f][:alpha].nil?
                row+=[sprintf("%0.5f",sid[f][:alpha])]
              else
                row+=["-"]
              end
              t.row row
            end # end each
          end # table
          s.parse_element(Statsample::Graph::Histogram.new(@total, :name=>"Histogram (valid data) for %s" % @name)) if @summary_histogram
        end # section
      end # def
    end # class
  end # module
end # module


================================================
FILE: lib/statsample/reliability/skillscaleanalysis.rb
================================================
module Statsample
  module Reliability
    # Analysis of a Skill Scale
    # Given a dataset with results and a correct answers hash,
    # generates a ScaleAnalysis 
    # == Usage
    #  x1=%{a b b c}.to_vector
    #  x2=%{b a b c}.to_vector
    #  x3=%{a c b a}.to_vector
    #  ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3}.to_dataset
    #  key={'x1'=>'a','x2'=>'b','x3'=>'a'}    
    #  ssa=Statsample::Reliability::SkillScaleAnalysis.new(ds,key)
    #  puts ssa.summary
    class SkillScaleAnalysis
      include Summarizable
      attr_accessor :name
      attr_accessor :summary_minimal_item_correlation
      attr_accessor :summary_show_problematic_items
      def initialize(ds,key,opts=Hash.new)
        opts_default={
          :name=>_("Skill Scale Reliability Analysis (%s)") % ds.name,
          :summary_minimal_item_correlation=>0.10,
          :summary_show_problematic_items=>true
        }
        @ds=ds
        @key=key
        @opts=opts_default.merge(opts)
        @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
        @cds=nil
      end
      # Dataset only corrected vectors
      def corrected_dataset_minimal
        cds=corrected_dataset
        dsm=@key.keys.inject({}) {|ac,v| ac[v]=cds[v];ac}.to_dataset
        @key.keys.each do |k|
          dsm[k].name=_("%s(corrected)") % @ds[k].name
          dsm[k].labels=@ds[k].labels
        end
        
        dsm.name=_("Corrected dataset from %s") % @ds.name
        dsm
      end
      def vector_sum
        corrected_dataset_minimal.vector_sum
      end
      def vector_mean
        corrected_dataset_minimal.vector_mean
      end
      def scale_analysis
        sa=ScaleAnalysis.new(corrected_dataset_minimal)
        sa.name=_("%s (Scale Analysis)") % @name
        sa
      end
      def corrected_dataset
        if @cds.nil?
          @cds=@ds.dup_empty
          @key.keys.each {|k| @cds[k].type=:scale; @cds[k].name=@ds[k].name}
          @ds.each do |row|
            out={}
            row.each do |k,v|
              if @key.keys.include? k
                if @ds[k].is_valid? v
                  out[k]= @key[k]==v ? 1 : 0
                else
                  out[k]=nil
                end
              else
                out[k]=v
              end
            end
            @cds.add_case(out,false)
          end
          @cds.update_valid_data
        end
        @cds
      end
      def report_building(builder)
        builder.section(:name=>@name) do |s|
          sa=scale_analysis
          s.parse_element(sa)
          if summary_show_problematic_items
            s.section(:name=>_("Problematic Items")) do |spi|
              count=0
              sa.item_total_correlation.each do |k,v|
                if v < summary_minimal_item_correlation
                  count+=1
                  spi.section(:name=>_("Item: %s") % @ds[k].name) do |spii|
                    spii.text _("Correct answer: %s") % @key[k]
                    spii.text _("p: %0.3f") % corrected_dataset[k].mean
                    props=@ds[k].proportions.inject({}) {|ac,v| ac[v[0]] = v[1].to_f;ac}
                    
                    spi.table(:name=>"Proportions",:header=>[_("Value"), _("%")]) do |table|
                      props.each do |k1,v|
                        table.row [ @ds[k].labeling(k1), "%0.3f" % v]
                      end
                    end
                    
                  end
                end
              end
              spi.text _("No problematic items") if count==0
            end
          end
          
        end
      end
    end    
  end
end


================================================
FILE: lib/statsample/reliability.rb
================================================
module Statsample
  module Reliability
    class << self
      # Calculate Chonbach's alpha for a given dataset.
      # only uses tuples without missing data
      def cronbach_alpha(ods)
        ds=ods.dup_only_valid
        return nil if ds.vectors.any? {|k,v| v.variance==0}
        n_items=ds.fields.size
        return nil if n_items<=1
        s2_items=ds.vectors.inject(0) {|ac,v|
        ac+v[1].variance }
        total=ds.vector_sum
        
        (n_items.quo(n_items-1)) * (1-(s2_items.quo(total.variance)))
      end
      # Calculate Chonbach's alpha for a given dataset
      # using standarized values for every vector.
      # Only uses tuples without missing data
      # Return nil if one or more vectors has 0 variance
      def cronbach_alpha_standarized(ods)
        
        ds=ods.dup_only_valid
        
        return nil if ds.vectors.any? {|k,v| v.variance==0}
        
        ds=ds.fields.inject({}){|a,f|
          a[f]=ods[f].standarized; 
          a
        }.to_dataset
        
        cronbach_alpha(ds)
      end
      # Predicted reliability of a test by replicating
      # +n+ times the number of items 
      def spearman_brown_prophecy(r,n)
        (n*r).quo(1+(n-1)*r)
      end
      
      alias :sbp :spearman_brown_prophecy
      # Returns the number of items 
      # to obtain +r_d+ desired reliability
      # from +r+ current reliability, achieved with
      # +n+ items
      def n_for_desired_reliability(r,r_d,n=1)
        return nil if r.nil?
        (r_d*(1-r)).quo(r*(1-r_d))*n
      end
      
      # Get Cronbach alpha from <tt>n</tt> cases, 
      # <tt>s2</tt> mean variance and <tt>cov</tt>
      # mean covariance
      def cronbach_alpha_from_n_s2_cov(n,s2,cov)
        (n.quo(n-1)) * (1-(s2.quo(s2+(n-1)*cov)))
      end
      # Get Cronbach's alpha from a covariance matrix
      def cronbach_alpha_from_covariance_matrix(cov)
        n=cov.row_size
        raise "covariance matrix should have at least 2 variables" if n < 2
        s2=n.times.inject(0) {|ac,i| ac+cov[i,i]}
        (n.quo(n-1))*(1-(s2.quo(cov.total_sum)))
      end
      # Returns n necessary to obtain specific alpha
      # given variance and covariance mean of items
      def n_for_desired_alpha(alpha,s2,cov)
        # Start with a regular test : 50 items
        min=2
        max=1000
        n=50
        prev_n=0
        epsilon=0.0001
        dif=1000
        c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
        dif=c_a - alpha
        while(dif.abs>epsilon and n!=prev_n)
          prev_n=n
          if dif<0
            min=n
            n=(n+(max-min).quo(2)).to_i
          else
            max=n
            n=(n-(max-min).quo(2)).to_i
          end
          c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
          dif=c_a - alpha
          #puts "#{n} , #{c_a}"
          
        end
        n
      end
      # First derivative for alfa
      # Parameters
      # <tt>n</tt>: Number of items
      # <tt>sx</tt>: mean of variances 
      # <tt>sxy</tt>: mean of covariances
      
      def alpha_first_derivative(n,sx,sxy)
        (sxy*(sx-sxy)).quo(((sxy*(n-1))+sx)**2)
      end
      # Second derivative for alfa
      # Parameters
      # <tt>n</tt>: Number of items
      # <tt>sx</tt>: mean of variances 
      # <tt>sxy</tt>: mean of covariances
      
      def alfa_second_derivative(n,sx,sxy)
        (2*(sxy**2)*(sxy-sx)).quo(((sxy*(n-1))+sx)**3)
      end
    end
    class ItemCharacteristicCurve
      attr_reader :totals, :counts, :vector_total
      def initialize (ds, vector_total=nil)
        vector_total||=ds.vector_sum
        raise ArgumentError, "Total size != Dataset size" if vector_total.size!=ds.cases
        @vector_total=vector_total
        @ds=ds
        @totals={}
        @counts=@ds.fields.inject({}) {|a,v| a[v]={};a}
        process
      end
      def process
        i=0
        @ds.each do |row|
          tot=@vector_total[i]
          @totals[tot]||=0
          @totals[tot]+=1
          @ds.fields.each  do |f|
            item=row[f].to_s
            @counts[f][tot]||={}
            @counts[f][tot][item]||=0
            @counts[f][tot][item] += 1
          end
          i+=1
        end
      end
      # Return a hash with p for each different value on a vector
      def curve_field(field, item)
        out={}
        item=item.to_s
        @totals.each do |value,n|
          count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item]
          out[value]=count_value.quo(n)
        end
        out
      end # def
    end # self
  end # Reliability
end # Statsample

require 'statsample/reliability/icc.rb'
require 'statsample/reliability/scaleanalysis.rb'
require 'statsample/reliability/skillscaleanalysis.rb'
require 'statsample/reliability/multiscaleanalysis.rb'

================================================
FILE: lib/statsample/resample.rb
================================================
module Statsample
  module Resample
    class << self
      def repeat_and_save(times,&action)
        (1..times).inject([]) {|a,x| a.push(action.call); a}
      end
      
      def generate (size,low,upper)
        range=upper-low+1
        Vector.new((0...size).collect {|x| rand(range)+low },:scale)
      end
              
    end
  end
end


================================================
FILE: lib/statsample/rserve_extension.rb
================================================
# Several additions to Statsample objects, to support
# rserve-client

module Statsample
  class Vector
    def to_REXP
      Rserve::REXP::Wrapper.wrap(data_with_nils)
    end
  end
  class Dataset
    def to_REXP
      names=@fields
      data=@fields.map {|f|
        Rserve::REXP::Wrapper.wrap(@vectors[f].data_with_nils)
      }
      l=Rserve::Rlist.new(data,names)
      Rserve::REXP.create_data_frame(l)
    end
  end
end

================================================
FILE: lib/statsample/shorthand.rb
================================================
class Object
  # Shorthand for Statsample::Analysis.store(*args,&block)
  def ss_analysis(*args,&block)
    Statsample::Analysis.store(*args,&block)
  end
end

module Statsample
  # Module which provide shorthands for many methods.
  module Shorthand
    ###
    # :section: R like methods
    ###
    def read_with_cache(klass, filename,opts=Hash.new, cache=true)
      file_ds=filename+".ds"
      if cache and (File.exists? file_ds and File.mtime(file_ds)>File.mtime(filename))
        ds=Statsample.load(file_ds)
      else
        ds=klass.read(filename)
        ds.save(file_ds) if cache
      end
      ds
    end
    # Import an Excel file. Cache result by default
    def read_excel(filename, opts=Hash.new, cache=true)
      read_with_cache(Statsample::Excel, filename, opts, cache)

    end
    # Import an CSV file. Cache result by default

    def read_csv
      read_with_cache(Statsample::CSV, filename, opts, cache)
    end
    
    # Retrieve names (fields) from dataset
    def names(ds)
      ds.fields
    end
    # Create a correlation matrix from a dataset
    def cor(ds)
      Statsample::Bivariate.correlation_matrix(ds)
    end
    # Create a variance/covariance matrix from a dataset
    def cov(ds)
      Statsample::Bivariate.covariate_matrix(ds)
    end
    # Create a Statsample::Vector
    # Analog to R's c
    def vector(*args)
      Statsample::Vector[*args]
    end
    # Random generation for the normal distribution
    def rnorm(n,mean=0,sd=1)
      rng=Distribution::Normal.rng(mean,sd)
      Statsample::Vector.new_scale(n) { rng.call}
    end
    # Creates a new Statsample::Dataset
    # Each key is transformed into string
    def dataset(vectors=Hash.new)
      vectors=vectors.inject({}) {|ac,v| ac[v[0].to_s]=v[1];ac}
      Statsample::Dataset.new(vectors)
    end
    alias :data_frame :dataset
    # Returns a Statsample::Graph::Boxplot
    def boxplot(*args)
      Statsample::Graph::Boxplot.new(*args)
    end
    # Returns a Statsample::Graph::Histogram
    def histogram(*args)
      Statsample::Graph::Histogram.new(*args)
    end
    
    # Returns a Statsample::Graph::Scatterplot
    def scatterplot(*args)
      Statsample::Graph::Scatterplot.new(*args)
    end
    # Returns a Statsample::Test::Levene
    def levene(*args)
      Statsample::Test::Levene.new(*args)
    end
    def principal_axis(*args)
      Statsample::Factor::PrincipalAxis.new(*args)
      
    end
    def polychoric(*args)
      Statsample::Bivariate::Polychoric.new(*args)
    end
    def tetrachoric(*args)
      Statsample::Bivariate::Tetrachoric.new(*args)
    end

    ###
    # Other Shortcuts
    ###
    def lr(*args)
      Statsample::Regression.multiple(*args)
    end
    def pca(ds,opts=Hash.new)
      Statsample::Factor::PCA.new(ds,opts)
    end
    def dominance_analysis(*args)
      Statsample::DominanceAnalysis.new(*args)
    end
    def dominance_analysis_bootstrap(*args)
      Statsample::DominanceAnalysis::Bootstrap.new(*args)
    end
    def scale_analysis(*args)
      Statsample::Reliability::ScaleAnalysis.new(*args)
    end
    def skill_scale_analysis(*args)
      Statsample::Reliability::SkillScaleAnalysis.new(*args)
    end
    def multiscale_analysis(*args,&block)
      Statsample::Reliability::MultiScaleAnalysis.new(*args,&block)
    end
    def test_u(*args)
      Statsample::Test::UMannWhitney.new(*args)
    end
    module_function :test_u, :rnorm
  end
end


================================================
FILE: lib/statsample/srs.rb
================================================
module Statsample
	# Several methods to estimate parameters for simple random sampling
  # == Reference: 
  # * Cochran, W.(1972). Sampling Techniques [spanish edition].
  # * http://stattrek.com/Lesson6/SRS.aspx
  
	module SRS

		class << self
      ########################
      #
      # :SECTION: Proportion estimation
      #
      # Function for estimation of proportions
      ########################
      
      #
      # Finite population correction (over variance)
      # Source: Cochran(1972)
      def fpc_var(sam,pop)
          (pop - sam).quo(pop - 1)
      end
      # Finite population correction (over standard deviation)
      def fpc(sam,pop)
          Math::sqrt((pop-sam).quo(pop-1))
      end
      
      # Non sample fraction.
      #
      # 1 - sample fraction
      def qf(sam , pop)
          1-(sam.quo(pop))
      end
      # Sample size estimation for proportions, infinite poblation
      def estimation_n0(d,prop,margin=0.95)
          t=Distribution::Normal.p_value(1-(1-margin).quo(2))
          var=prop*(1-prop)
          t**2*var.quo(d**2)
      end
      # Sample size estimation for proportions, finite poblation.
      def estimation_n(d,prop,n_pobl,margin=0.95)
          n0=estimation_n0(d,prop,margin)
          n0.quo( 1 + ((n0 - 1).quo(n_pobl)))
      end
      
      
      # Proportion confidence interval with t values
      # Uses estimated proportion, sample without replacement.
      
      def proportion_confidence_interval_t(prop, n_sample, n_population, margin=0.95)
          t = Distribution::T.p_value(1-((1-margin).quo(2)) , n_sample-1)
          proportion_confidence_interval(prop,n_sample,n_population, t)
      end
      
      # Proportion confidence interval with z values
      # Uses estimated proportion, sample without replacement.
      def proportion_confidence_interval_z(p, n_sample, n_population, margin=0.95)
          z=Distribution::Normal.p_value(1-((1-margin).quo(2)))
          proportion_confidence_interval(p,n_sample,n_population, z)
      end
      # Proportion confidence interval with x value
      # Uses estimated proportion, sample without replacement
      
      def proportion_confidence_interval(p, sam,pop , x)
          #f=sam.quo(pop)
          one_range=x * Math::sqrt((qf(sam, pop) * p * (1-p)).quo(sam-1)) + (1.quo(sam * 2.0))
          [p-one_range, p+one_range]
      end
      # Standard deviation for sample distribution of a proportion
      # Know proportion, sample with replacement.
      # Based on http://stattrek.com/Lesson6/SRS.aspx
      def proportion_sd_kp_wr(p, n_sample)
          Math::sqrt(p*(1-p).quo(n_sample))
      end
      # Standard deviation for sample distribution of a proportion
      # Know proportion, sample without replacement.
      #
      # Sources: 
      # * Cochran(1972)
      def proportion_sd_kp_wor(p, sam, pop)
          fpc(sam,pop)*Math::sqrt(p*(1-p).quo(sam))
      end
      # Standard deviation for sample distribution of a proportion
      # Estimated proportion, sample with replacement
      # Based on http://stattrek.com/Lesson6/SRS.aspx.
      def proportion_sd_ep_wr(p, n_sample)
          Math::sqrt(p*(1-p).quo(n_sample-1))
      end                                       
      # Standard deviation for sample distribution of a proportion.
      # Estimated proportion, sample without replacement.
      # Reference: 
      # * Cochran, 1972, Técnicas de muestreo
      def proportion_sd_ep_wor(p, sam,pop)
          fsc=(pop-sam).quo((sam-1)*pop)
          Math::sqrt(fsc*p*(1-p))
      end
      
      # Total estimation sd based on sample. 
      # Known proportion, sample without replacement
      # Reference: 
      # * Cochran(1972)
      def proportion_total_sd_kp_wor(prop, sam, pop)
          pob * proportion_sd_kp_wor(p, sam, pop)
      end
      # Total estimation sd based on sample. 
      # Estimated proportion, sample without replacement
      # Source: Cochran(1972)
      def proportion_total_sd_ep_wor(prop, sam, pop)
          fsc=((pop - sam).to_f / ( sam - 1))
          Math::sqrt(fsc*pop*prop*(1-prop))
      end 
      
      ########################
      #
      # :SECTION:  Mean stimation
      #
      ########################

      
      # Standard error. Known variance, sample with replacement.
      def standard_error_ksd_wr(s, sam, pop)
          s.quo(Math::sqrt(sam)) * Math::sqrt((pop-1).quo(pop))
      end
      
      # Standard error of the mean. Known variance, sample w/o replacement
      def standard_error_ksd_wor(s,sam,pop)
          s.quo(Math::sqrt(sam)) * Math::sqrt(qf(sam,pop)) 
      end
      
      alias_method :standard_error_esd_wr, :standard_error_ksd_wr
      
      # Standard error of the mean. 
      # Estimated variance, without replacement
      # Cochran (1972) p.47
      def standard_error_esd_wor(s,sam,pop)
          s.quo(Math::sqrt(sam)) * Math::sqrt(qf(sam,pop))
      end
      
      alias_method :standard_error, :standard_error_esd_wor
      alias_method :se, :standard_error_esd_wor

      # Standard error of total estimation
      
      def standard_error_total(s,sam,pop)
          pop*se(s,sam,pop)
      end

      # Confidence Interval using T-Student
      # Use with n < 60
      def mean_confidence_interval_t(mean,s,n_sample,n_population,margin=0.95)
          t=Distribution::T.p_value(1-((1-margin) / 2),n_sample-1)
          mean_confidence_interval(mean,s,n_sample,n_population,t)
      end
      # Confidente Interval using Z
      # Use with n > 60
      def mean_confidence_interval_z(mean,s,n_sample,n_population,margin=0.95)
          z=Distribution::Normal.p_value(1-((1-margin) / 2))
          mean_confidence_interval(mean,s,n_sample,n_population, z)
      end
      # Confidente interval using X.
      #
      # Better use mean_confidence_interval_z or mean_confidence_interval_t
      def mean_confidence_interval(mean,s,n_sample,n_population,x)
          range=x*se(s,n_sample,n_population)
          [mean-range,mean+range]
      end
		end
	end
end


================================================
FILE: lib/statsample/test/bartlettsphericity.rb
================================================
module Statsample
  module Test
    # == Bartlett's test of Sphericity.
    # Test the hyphotesis that the sample correlation matrix
    # comes from a multivariate normal population where variables
    # are independent. In other words, the population correlation
    # matrix is the identity matrix.
    # == Reference
    # * Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
    class BartlettSphericity
      include Statsample::Test
      include Summarizable
      attr_accessor :name
      attr_reader :ncases
      attr_reader :nvars
      attr_reader :value
      attr_reader :df
      # Args
      # * _matrix_: correlation matrix
      # * _ncases_: number of cases
      def initialize(matrix,ncases)
        @matrix=matrix
        @ncases=ncases
        @nvars=@matrix.row_size
        @name=_("Bartlett's test of sphericity")
        compute
      end
      # Uses SPSS formula.
      # On Dziuban & Shirkey, the minus between the first and second
      # statement is a *!!!
      # 
      def compute
        @value=-((@ncases-1)-(2*@nvars+5).quo(6))*Math::log(@matrix.determinant)
        @df=(@nvars*(@nvars-1)).quo(2)
      end
      def probability
        1-Distribution::ChiSquare.cdf(@value,@df)
      end
      def report_building(builder) # :nodoc:
        builder.text "%s : X(%d) = %0.4f , p = %0.4f" % [@name, @df, @value, probability]
      end
      
    end
  end
end


================================================
FILE: lib/statsample/test/chisquare.rb
================================================
module Statsample
  module Test
    module ChiSquare
      class WithMatrix
        attr_reader :df
        attr_reader :value
        def initialize(observed, expected=nil)
          @observed=observed
          @expected=expected or calculate_expected
          raise "Observed size!=expected size" if @observed.row_size!=@expected.row_size or @observed.column_size!=@expected.column_size
          @df=(@observed.row_size-1)*(@observed.column_size-1)
          @value=compute_chi
        end
        def calculate_expected
          sum=@observed.total_sum
          @expected=Matrix.rows( @observed.row_size.times.map {|i|
            @observed.column_size.times.map {|j|
              (@observed.row_sum[i].quo(sum) * @observed.column_sum[j].quo(sum))*sum
            }
          })          
        end
        def to_f
          @value
        end
        def chi_square
          @value
        end
        def probability
          1-Distribution::ChiSquare.cdf(@value.to_f,@df)
        end
        def compute_chi
            sum=0
            (0...@observed.row_size).each {|i|
              (0...@observed.column_size).each {|j|
              sum+=((@observed[i, j] - @expected[i,j])**2).quo(@expected[i,j])
              }
            }
            sum
        end
      end
    end
  end
end

================================================
FILE: lib/statsample/test/f.rb
================================================
module Statsample
  module Test
    # From Wikipedia:
    # An F-test is any statistical test in which the test statistic has an F-distribution under the null hypothesis. It is most often used when comparing statistical models that have been fit to a data set, in order to identify the model that best fits the population from which the data were sampled.
    class F
      include Statsample::Test
      include Summarizable
      attr_reader :var_num, :var_den, :df_num, :df_den, :var_total, :df_total
      # Tails for probability (:both, :left or :right)
      attr_accessor :tails
      # Name of F analysis
      attr_accessor :name

      # Parameters:
      # * var_num: variance numerator
      # * var_den: variance denominator
      # * df_num: degrees of freedom numerator
      # * df_den: degrees of freedom denominator
      def initialize(var_num, var_den, df_num, df_den, opts=Hash.new)
        @var_num=var_num
        @var_den=var_den
        @df_num=df_num
        @df_den=df_den
        @var_total=var_num+var_den
        @df_total=df_num+df_den
        opts_default={:tails=>:right, :name=>_("F Test")}
        @opts=opts_default.merge(opts)
        raise "Tails should be right or left, not both" if @opts[:tails]==:both
        opts_default.keys.each {|k|
          send("#{k}=", @opts[k])
        }
      end
      def f
        @var_num.quo(@var_den)
      end
      def to_f
        f
      end
      # probability
      def probability
        p_using_cdf(Distribution::F.cdf(f, @df_num, @df_den), tails)
      end
      def report_building(builder) #:nodoc:
        if @df_num.is_a? Integer and @df_den.is_a? Integer
          builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
        else
          builder.text "%s : F(%0.2f, %0.2f) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
        end
      end
    end
  end
end


================================================
FILE: lib/statsample/test/kolmogorovsmirnov.rb
================================================
module Statsample
  module Test
    # == Kolmogorov-Smirnov's test of equality of distributions.
    class KolmogorovSmirnov
      
      attr_reader :d
      include Statsample::Test
      include Summarizable
      # Creates a new Kolmogorov-Smirnov test
      # d1 should have each method
      # d2 could be a Distribution class, with a cdf method,
      # a vector or a lambda
      def initialize(d1,d2)
        raise "First argument should have each method" unless d1.respond_to? :each
        @d1=make_cdf(d1)
        if d2.respond_to? :cdf or d2.is_a? Proc
          @d2=d2
        elsif d2.respond_to? :each
          @d2=make_cdf(d2)
        else
           raise "Second argument should respond to cdf or each"    
         end
         calculate
      end
      def calculate
         d=0
        @d1.each {|x|
        v1=@d1.cdf(x);
        v2=@d2.is_a?(Proc) ? @d2.call(x) : @d2.cdf(x)
        d=(v1-v2).to_f.abs if (v1-v2).abs>d
        }
        @d=d
      end
      # Make a wrapper EmpiricDistribution to any method which implements
      # each
      # On Statsample::Vector, only uses #valid_data
      def make_cdf(v)
        v.is_a?(Statsample::Vector) ? EmpiricDistribution.new(v.valid_data) : EmpiricDistribution.new(v)
      end
      class EmpiricDistribution
        def initialize(data)
          @min=data.min
          @max=data.max
          @data=data.sort
          @n=data.size
        end
        def each
          @data.each {|x|
            yield x
          }
        end
        def cdf(x)
          return 0 if x<@min
          return 1 if x>=@max
          v=@data.index{|v1| v1>=x}
          v.nil? ? 0 : (v+(x==@data[v]? 1 : 0)).quo(@n)
        end
      end # End EmpiricDistribution
    end
  end
end


================================================
FILE: lib/statsample/test/levene.rb
================================================
module Statsample
  module Test
    # = Levene Test for Equality of Variances
    # From NIST/SEMATECH:
    # <blockquote>Levene's test ( Levene, 1960) is used to test if k samples have equal variances. Equal variances across samples is called homogeneity of variance. Some statistical tests, for example the analysis of variance, assume that variances are equal across groups or samples. The Levene test can be used to verify that assumption.</blockquote>
    # Use:
    #   require 'statsample'
    #   a=[1,2,3,4,5,6,7,8,100,10].to_scale
    #   b=[30,40,50,60,70,80,90,100,110,120].to_scale
    # 
    #   levene=Statsample::Test::Levene.new([a,b])
    #   puts levene.summary
    #   
    # Output:
    #   Levene Test
    #   F: 0.778121319848449
    #   p: 0.389344552595791
    #
    # Reference:
    # * NIST/SEMATECH e-Handbook of Statistical Methods. Available on http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
    class Levene
      include Statsample::Test
      include Summarizable
      # Degrees of freedom 1 (k-1)
      attr_reader :d1
      # Degrees of freedom 2 (n-k)
      attr_reader :d2
      # Name of test
      attr_accessor :name
      # Input could be an array of vectors or a dataset
      def initialize(input, opts=Hash.new())
        if input.is_a? Statsample::Dataset
          @vectors=input.vectors.values
        else
          @vectors=input
        end
        @name=_("Levene Test")
        opts.each{|k,v|
          self.send("#{k}=",v) if self.respond_to? k
        }
        compute
      end
      # Value of the test
      def f
        @w
      end
      def report_building(builder) # :nodoc:
        builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @d1, @d2, f, probability]
      end
      def compute
        n=@vectors.inject(0) {|ac,v| ac+v.n_valid}
        
        zi=@vectors.collect {|vector|
          mean=vector.mean
          vector.collect {|v| (v-mean).abs }.to_scale
        }
        
        total_mean=zi.inject([]) {|ac,vector|
          ac+vector.valid_data
        }.to_scale.mean
      
        k=@vectors.size
        
        sum_num=zi.inject(0) {|ac,vector|
          ac+(vector.size*(vector.mean-total_mean)**2)
        }
        
        sum_den=zi.inject(0) {|ac,vector|
          z_mean=vector.mean
          ac+vector.valid_data.inject(0) {|acp,zij|
            acp+(zij-z_mean)**2
          }
        }
        @w=((n-k)*sum_num).quo((k-1)*sum_den)
        @d1=k-1
        @d2=n-k
      end
      private :compute
      # Probability.
      # With H_0 = Sum(s2)=0, probability of getting a value of the test upper or equal to the obtained on the sample
      def probability
        p_using_cdf(Distribution::F.cdf(f, @d1, @d2), :right)
      end
      
    end
  end
end


================================================
FILE: lib/statsample/test/t.rb
================================================
module Statsample
  module Test
    
    
    # A t-test is any statistical hypothesis test in which the test statistic follows a Student's t distribution, if the null hypothesis is supported
    class T
      
      class << self
        include Math
        # Test the null hypothesis that the population mean is equal to a specified value u, one uses the statistic.
        # Is the same formula used on t-test for paired sample.
        # * <tt>x</tt>: sample/differences mean
        # * <tt>u</tt>: population mean
        # * <tt>s</tt>: sample/differences standard deviation
        # * <tt>n</tt>: sample size
        def one_sample(x,u,s,n)
          (x-u)*Math::sqrt(n).quo(s)
        end
        # Test if means of two samples are different.
        # * <tt>x1</tt>: sample 1 mean
        # * <tt>x2</tt>: sample 2 mean
        # * <tt>s1</tt>: sample 1 standard deviation
        # * <tt>s2</tt>: sample 2 standard deviation
        # * <tt>n1</tt>: sample 1 size
        # * <tt>n2</tt>: sample 2 size
        # * <tt>equal_variance</tt>: true if equal_variance assumed
        #
        def two_sample_independent(x1, x2, s1, s2, n1, n2, equal_variance = false)
          num=x1-x2
          if equal_variance
            sx1x2 = sqrt(((n1-1)*s1**2 + (n2-1)*s2**2).quo(n1+n2-2))
            den   = sx1x2*sqrt(1.quo(n1)+1.quo(n2))
          else
            den=sqrt((s1**2).quo(n1) + (s2**2).quo(n2))
          end
          num.quo(den)
        end
        # Degrees of freedom for equal variance on t test
        def df_equal_variance(n1,n2)
          n1+n2-2
        end
        # Degrees of freedom for unequal variance
        # * <tt>s1</tt>: sample 1 standard deviation
        # * <tt>s2</tt>: sample 2 standard deviation
        # * <tt>n1</tt>: sample 1 size
        # * <tt>n2</tt>: sample 2 size
        # == Reference
        # * http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation
        def df_not_equal_variance(s1,s2,n1,n2)
          s2_1=s1**2
          s2_2=s2**2
          num=(s2_1.quo(n1)+s2_2.quo(n2))**2
          den=(s2_1.quo(n1)**2).quo(n1-1) + (s2_2.quo(n2)**2).quo(n2-1)
          num.quo(den)
        end        
      end
      
      include Statsample::Test
      include Summarizable
      attr_reader :standard_error, :estimate, :df
      # Tails for p-value (:both, :left or :right). Default :both
      attr_accessor :tails
      # Name of F analysis
      attr_accessor :name
      attr_accessor :confidence_level
      attr_reader :t
      attr_accessor :estimate_name, :standard_error_name
      # Creates a generic t test. Use OneSample or TwoSamplesIndependent
      # classes for better summaries.
      # Parameters:
      # * estimate: estimate
      # * standard_error: standard error of estimate
      # * df: degrees of freedom
      def initialize(estimate, standard_error, df, opts=Hash.new)
        @estimate=estimate
        @standard_error=standard_error
        @df=df
        @t = @estimate / @standard_error.to_f
        opts_default={  :tails=>:both,
                        :name=>_("T Test"),
                        :estimate_name=>_("Estimate"),
                        :standard_error_name=>_("Std.Err.of Estimate"),
        :confidence_level=>0.95}
        @opts = opts_default.merge(opts)
        
        @opts.keys.each {|k|
          send("#{k}=", @opts[k]) if respond_to? k
        }
      end
      
      alias :se :standard_error
      
      def to_f
        t
      end
      
      # probability
      def probability
        p_using_cdf(Distribution::T.cdf(t, df),  tails)
      end
      
      def confidence_interval(cl=nil)
          cl||=confidence_level
          t_crit = t_critical(cl, df)
          [estimate - se*t_crit, estimate + se*t_crit]
      end
      alias :ci :confidence_interval
      
      
      def report_building(builder) #:nodoc:
        builder.section(:name=>@name) do |section|
          section.text _("%s: %0.4f | %s: %0.4f") % [@estimate_name, @estimate, @standard_error_name, se]
          report_building_t(section)
        end
      end
      def report_building_t(s)
        df_f=@df.is_a?(Integer) ? "%d" : "%0.4f"
        s.text _("t(%d) = %0.4f, p=%0.4f (%s tails)") % [df, t,probability, tails]
        s.text _("CI(%d%%): %0.4f - %0.4f") % [confidence_level*100, ci[0],ci[1]]
        
      end
      
      
      # One Sample t-test
      # == Usage
      #   a=1000.times.map {rand(100)}.to_scale
      #   t_1=Statsample::Test::T::OneSample.new(a, {:u=>50})
      #   t_1.summary
      #
      # === Output
      #
      #  = One Sample T Test
      #  Sample mean: 48.954
      #  Population mean:50
      #  Tails: both
      #  t = -1.1573, p=0.2474, d.f=999

      class OneSample
        include Math
        include Statsample::Test
        include Summarizable
        # Options
        attr_accessor :opts
        # Name of test
        attr_accessor :name
        # Population mean to contrast
        attr_accessor :u
        # Degress of freedom
        attr_reader :df
        # Tails for probability (:both, :left or :right)
        attr_accessor :tails 
        
        # Create a One Sample T Test
        # Options:
        # * :u = Mean to compare. Default= 0
        # * :name = Name of the analysis
        # * :tails = Tail for probability. Could be :both, :left, :right
        def initialize(vector, opts=Hash.new)
          @vector=vector
          default={:u=>0, :name=>"One Sample T Test", :tails=>:both}
          @opts=default.merge(opts)
          @name=@opts[:name]
          @u=@opts[:u]
          @tails=@opts[:tails]
          @confidence_level=@opts[:confidence_level] || 0.95
          @df= @vector.n_valid-1
          @t=nil
        end        
        def t_object
          T.new(@vector.mean-u, @vector.se, @vector.n_valid-1, opts)
        end
        def t
          t_object.t
        end
        def probability
          t_object.probability
        end
        def standard_error
          t_object.standard_error
        end
        alias :se :standard_error
        def confidence_interval(cl=nil)
          t_object.confidence_interval(cl)
        end
        alias :ci :confidence_interval
        def report_building(b) # :nodoc:
          b.section(:name=>@name) {|s|
            s.text _("Sample mean: %0.4f | Sample sd: %0.4f | se : %0.4f") % [@vector.mean, @vector.sd, se]
            s.text _("Population mean: %0.4f") % u if u!=0
            t_object.report_building_t(s)
          }
        end
      end
      # Two Sample t-test.
      #
      # == Usage
      #   a=1000.times.map {rand(100)}.to_scale
      #   b=1000.times.map {rand(100)}.to_scale
      #   t_2=Statsample::Test::T::TwoSamplesIndependent.new(a,b)
      #   t_2.summary
      # === Output
      #  = Two Sample T Test
      #  Mean and standard deviation
      #  +----------+---------+---------+------+
      #  | Variable |    m    |   sd    |  n   |
      #  +----------+---------+---------+------+
      #  | 1        | 49.3310 | 29.3042 | 1000 |
      #  | 2        | 47.8180 | 28.8640 | 1000 |
      #  +----------+---------+---------+------+
      #  
      #  == Levene Test
      #   Levene Test
      #   F: 0.3596
      #   p: 0.5488
      #   T statistics
      #   +--------------------+--------+-----------+----------------+
      #   |        Type        |   t    |    df     | p (both tails) |
      #   +--------------------+--------+-----------+----------------+
      #   | Equal variance     | 1.1632 | 1998      | 0.2449         |
      #   | Non equal variance | 1.1632 | 1997.5424 | 0.1362         |
      #   +--------------------+--------+-----------+----------------+

      class TwoSamplesIndependent
        include Math
        include Statsample::Test
        
        include DirtyMemoize
        include Summarizable
        # Options
        attr_accessor :opts
        # Name of test
        attr_accessor :name
        # Degress of freedom (equal variance)
        attr_reader :df_equal_variance
        # Degress of freedom (not equal variance)
        attr_reader :df_not_equal_variance
        # Value of t for equal_variance
        attr_reader :t_equal_variance
        # Value of t for non-equal_variance
        attr_reader :t_not_equal_variance
        # Probability(equal variance)
        attr_reader :probability_equal_variance
        # Probability(unequal variance)
        attr_reader :probability_not_equal_variance        
        # Tails for probability (:both, :left or :right)
        attr_accessor :tails 
        # Create the object
        
        dirty_writer :tails
        dirty_memoize :t_equal_variance, :t_not_equal_variance, :probability_equal_variance, :probability_not_equal_variance, :df_equal_variance, :df_not_equal_variance

        # Create a Two Independent T Test
        # Options:
        # * :name = Name of the analysis
        # * :tails = Tail for probability. Could be :both, :left, :right        
        def initialize(v1, v2, opts=Hash.new)
          @v1=v1
          @v2=v2
          default={:u=>0, :name=>"Two Sample T Test",  :tails=>:both}
          @opts=default.merge(opts)
          @name=@opts[:name]
          @tails=@opts[:tails]          
        end        
       
        # Set t and probability for given u
        def compute
          @t_equal_variance= T.two_sample_independent(@v1.mean, @v2.mean, @v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid,true)
          
          @t_not_equal_variance= T.two_sample_independent(@v1.mean, @v2.mean, @v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid, false)

          @df_equal_variance=T.df_equal_variance(@v1.n_valid, @v2.n_valid)
          @df_not_equal_variance=T.df_not_equal_variance(@v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid)
          
          @probability_equal_variance = p_using_cdf(Distribution::T.cdf(@t_equal_variance, @df_equal_variance), tails)
          
          @probability_not_equal_variance = p_using_cdf(Distribution::T.cdf(@t_not_equal_variance, @df_not_equal_variance), tails)

        end
        # Cohen's d is a measure of effect size. Its defined as the difference between two means divided by a standard deviation for the data
        def d
          n1=@v1.n_valid
          n2=@v2.n_valid
          num=@v1.mean-@v2.mean
          den=Math::sqrt( ((n1-1)*@v1.sd+(n2-1)*@v2.sd).quo(n1+n2))
          num.quo(den)
        end
        
        def report_building(b) # :nodoc:
          b.section(:name=>@name) {|g|
            g.table(:name=>_("Mean and standard deviation"), :header=>[_("Variable"), _("mean"), _("sd"),_("n")]) {|t|
              t.row([@v1.name,"%0.4f" % @v1.mean,"%0.4f" % @v1.sd,@v1.n_valid])
              t.row([@v2.name,"%0.4f" % @v2.mean,"%0.4f" % @v2.sd, @v2.n_valid])
            }
            g.parse_element(Statsample::Test.levene([@v1,@v2],:name=>_("Levene test for equality of variances")))
            
            g.table(:name=>_("T statistics"),:header=>["Type","t","df", "p (#{tails} tails)"].map{|v| _(v)}) {|t|
              t.row([_("Equal variance"), "%0.4f" % t_equal_variance, df_equal_variance, "%0.4f" % probability_equal_variance])
              t.row([_("Non equal variance"), "%0.4f" % t_not_equal_variance, "%0.4f" % df_not_equal_variance, "%0.4f" % probability_not_equal_variance])
            }
            g.table(:name=>_("Effect size")) do |t|
              t.row ['x1-x2', "%0.4f" % (@v1.mean-@v2.mean)]
              t.row ['d', "%0.4f" % d]
            end
          }
        end
      end      
    end
  end
end


================================================
FILE: lib/statsample/test/umannwhitney.rb
================================================
module Statsample
  module Test
    #
    # = U Mann-Whitney test
    #
    # Non-parametric test for assessing whether two independent samples
    # of observations come from the same distribution.
    # 
    # == Assumptions
    #
    # * The two samples under investigation in the test are independent of each other and the observations within each sample are independent.
    # * The observations are comparable (i.e., for any two observations, one can assess whether they are equal or, if not, which one is greater).
    # * The variances in the two groups are approximately equal.
    #
    # Higher differences of distributions correspond to 
    # to lower values of U.
    #
    class UMannWhitney
      # Max for m*n allowed for exact calculation of probability
      MAX_MN_EXACT=10000
      
      # U sampling distribution, based on Dinneen & Blakesley (1973) algorithm.
      # This is the algorithm used on SPSS.
      # 
      # Parameters:
      # * <tt>n1</tt>: group 1 size
      # * <tt>n2</tt>: group 2 size 
      # == Reference: 
      # * Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. <em>Journal of the Royal Statistical Society, 22</em>(2), 269-273
      # 
      def self.u_sampling_distribution_as62(n1,n2)

        freq=[]
        work=[]
        mn1=n1*n2+1
        max_u=n1*n2
        minmn=n1<n2 ? n1 : n2
        maxmn=n1>n2 ? n1 : n2
        n1=maxmn+1
        (1..n1).each{|i| freq[i]=1}
        n1+=1
        (n1..mn1).each{|i| freq[i]=0}
        work[1]=0
        xin=maxmn
        (2..minmn).each do |i|
          work[i]=0
          xin=xin+maxmn
          n1=xin+2
          l=1+xin.quo(2)
          k=i
          (1..l).each do |j|
            k=k+1
            n1=n1-1
            sum=freq[j]+work[j]
            freq[j]=sum
            work[k]=sum-freq[n1]
            freq[n1]=sum
          end
        end
        
        # Generate percentages for normal U
        dist=(1+max_u/2).to_i
        freq.shift
        total=freq.inject(0) {|a,v| a+v }
        (0...dist).collect {|i|
          if i!=max_u-i
            ues=freq[i]*2
          else
            ues=freq[i]
          end
          ues.quo(total)
        }
      end
      
      # Generate distribution for permutations. 
      # Very expensive, but useful for demostrations
      
      def self.distribution_permutations(n1,n2)
        base=[0]*n1+[1]*n2
        po=Statsample::Permutation.new(base)
        
        total=n1*n2
        req={}
        po.each do |perm|
          r0,s0=0,0
          perm.each_index {|c_i|
            if perm[c_i]==0
              r0+=c_i+1
              s0+=1
            end
          }
          u1=r0-((s0*(s0+1)).quo(2))
          u2=total-u1
          temp_u= (u1 <= u2) ? u1 : u2
          req[perm]=temp_u
        end
        req
      end
      # Sample 1 Rank sum
      attr_reader :r1
      # Sample 2 Rank sum
      attr_reader :r2
      # Sample 1 U (useful for demostration)
      attr_reader :u1
      # Sample 2 U (useful for demostration)
      attr_reader :u2
      # U Value 
      attr_reader :u
      # Value of compensation for ties (useful for demostration)
      attr_reader :t
      # Name of test
      attr_accessor :name
      include Summarizable
      #
      # Create a new U Mann-Whitney test
      # Params: Two Statsample::Vectors
      # 
      def initialize(v1,v2, opts=Hash.new)
        @v1=v1
        @v2=v2
        @n1=v1.valid_data.size
        @n2=v2.valid_data.size
        data=(v1.valid_data+v2.valid_data).to_scale
        groups=(([0]*@n1)+([1]*@n2)).to_vector
        ds={'g'=>groups, 'data'=>data}.to_dataset
        @t=nil
        @ties=data.data.size!=data.data.uniq.size        
        if(@ties)
          adjust_for_ties(ds['data'])
        end
        ds['ranked']=ds['data'].ranked(:scale)
        
        @n=ds.cases
          
        @r1=ds.filter{|r| r['g']==0}['ranked'].sum
        @r2=((ds.cases*(ds.cases+1)).quo(2))-r1
        @u1=r1-((@n1*(@n1+1)).quo(2))
        @u2=r2-((@n2*(@n2+1)).quo(2))
        @u=(u1<u2) ? u1 : u2
        opts_default={:name=>_("Mann-Whitney's U")}
        @opts=opts_default.merge(opts)
        opts_default.keys.each {|k|
          send("#{k}=", @opts[k])
        }
          
      end
      def report_building(generator) # :nodoc:
        generator.section(:name=>@name) do |s|
          s.table(:name=>_("%s results") % @name) do |t|
            t.row([_("Sum of ranks %s") % @v1.name, "%0.3f" % @r1])
            t.row([_("Sum of ranks %s") % @v2.name, "%0.3f" % @r2])
            t.row([_("U Value"), "%0.3f" % @u])
            t.row([_("Z"), "%0.3f (p: %0.3f)" % [z, probability_z]])
            if @n1*@n2<MAX_MN_EXACT
              t.row([_("Exact p (Dinneen & Blakesley, 1973):"), "%0.3f" % probability_exact])
            end
          end
        end
      end
      # Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000.
      # Uses u_sampling_distribution_as62
      def probability_exact
        dist=UMannWhitney.u_sampling_distribution_as62(@n1,@n2)
        sum=0
        (0..@u.to_i).each {|i|
          sum+=dist[i]
        }
        sum
      end
      # Adjunt for ties.
      # 
      # == Reference: 
      # * http://europe.isixsigma.com/library/content/c080806a.asp
      def adjust_for_ties(data)
        @t=data.frequencies.find_all{|k,v| v>1}.inject(0) {|a,v|
          a+(v[1]**3-v[1]).quo(12)
        }        
      end
      
      private :adjust_for_ties
      
      # Z value for U, with adjust for ties.
      # For large samples, U is approximately normally distributed. 
      # In that case, you can use z to obtain probabily for U.
      # == Reference: 
      # * SPSS Manual
      def z
        mu=(@n1*@n2).quo(2)
        if(!@ties)
          ou=Math::sqrt(((@n1*@n2)*(@n1+@n2+1)).quo(12))
        else
          n=@n1+@n2
          first=(@n1*@n2).quo(n*(n-1))
          second=((n**3-n).quo(12))-@t
          ou=Math::sqrt(first*second)
        end
        (@u-mu).quo(ou)
      end
      # Assuming H_0, the proportion of cdf with values of U lower
      # than the sample, using normal approximation.
      # Use with more than 30 cases per group.
      def probability_z
        (1-Distribution::Normal.cdf(z.abs()))*2
      end
    end
      
  end
end

================================================
FILE: lib/statsample/test/wilcoxonsignedrank.rb
================================================
module Statsample
  module Test
    # From Wikipedia:
    # The Wilcoxon signed-rank test is a non-parametric statistical hypothesis test used when comparing two related samples, matched samples, or repeated measurements on a single sample to assess whether their population mean ranks differ (i.e. it is a paired difference test). It can be used as an alternative to the paired Student's t-test, t-test for matched pairs, or the t-test for dependent samples when the population cannot be assumed to be normally distributed.
    class WilcoxonSignedRank
      include Statsample::Test
      include Summarizable
      
      # Name of F analysis
      attr_accessor :name
	  attr_reader :w
	  attr_reader :nr
	  attr_writer :tails
      # Parameters:
      def initialize(v1,v2, opts=Hash.new)
		@v1=v1
		@v2=v2
        opts_default={:name=>_("Wilcoxon Signed Rank Test"),:tails=>:both}
        @opts=opts_default.merge(opts)
        opts_default.keys.each {|k|
          send("#{k}=", @opts[k])
        }
        calculate
      end
      def calculate
		df=Statsample::Dataset.new({'v1'=>@v1,'v2'=>@v2})
		df["abs"]=df.collect {|row| 
			r=(row["v2"]-row["v1"]).abs
		}
		df["sgn"]=df.collect {|row| 
			r=row["v2"]-row["v1"]
			r==0 ? 0 : r/r.abs
		}
		df=df.filter {|row| row["sgn"]!=0}
		df["rank"]=df["abs"].ranked
		@nr=df.cases
		@w=df.collect {|row|
			row["sgn"]*row["rank"]
			#p row["sgn"]*row["rank"]
		}.sum
      end
      def report_building(generator) # :nodoc:
        generator.section(:name=>@name) do |s|
          s.table(:name=>_("%s results") % @name) do |t|
            t.row([_("W Value"), "%0.3f" % @w])
            t.row([_("Z"), "%0.3f (p: %0.3f)" % [z, probability_z]])
            if(nr<=10) 
				t.row([_("Exact probability"), "p-exact: %0.3f" % [probability_exact]])
            end
          end
        end
      end
      def z
		sigma=Math.sqrt((nr*(nr+1)*(2*nr+1))/6)
		(w-0.5)/sigma
      end
      # Assuming normal distribution of W, this calculate
      # the probability of samples with Z equal or higher than
      # obtained on sample
      def probability_z
		(1-Distribution::Normal.cdf(z))*(@tails==:both ? 2:1)
      end
      # Calculate exact probability.
      # Don't calculate for large Nr, please!
      def probability_exact
		str_format="%0#{nr}b"
		combinations=2**nr
		#p str_format
		total_w=combinations.times.map {|i|
			comb=sprintf(str_format,i)
			w_local=comb.length.times.inject(0) {|ac,j|
				sgn=comb[j]=="0" ? -1 : 1
				ac+(j+1)*sgn
			}
		}.sort
		total_w.find_all {|v| 
			if @tails==:both
				v<=-w.abs or v>=w.abs
			elsif @tails==:left
				v<=w
			elsif @tails==:right
				v>=w
			end
		}.count/(combinations.to_f)
      end
    end
  end
end


================================================
FILE: lib/statsample/test.rb
================================================
module Statsample
  # Module for several statistical tests
  
  module Test
    autoload(:UMannWhitney, 'statsample/test/umannwhitney')
    autoload(:Levene, 'statsample/test/levene')
    autoload(:T, 'statsample/test/t')
    autoload(:F, 'statsample/test/f')
    autoload(:ChiSquare, 'statsample/test/chisquare')
    autoload(:BartlettSphericity, 'statsample/test/bartlettsphericity')
    autoload(:KolmogorovSmirnov, 'statsample/test/kolmogorovsmirnov')
    autoload(:WilcoxonSignedRank, 'statsample/test/wilcoxonsignedrank')
    
    
    # Returns probability of getting a value lower or higher
    # than sample, using cdf and number of tails.
    # 
    # * <tt>:left</tt> : For one tail left, return the cdf
    # * <tt>:right</tt> : For one tail right, return 1-cdf
    # * <tt>:both</tt> : For both tails, returns 2*right_tail(cdf.abs)
    def p_using_cdf(cdf, tails=:both)
      tails=:both if tails==2 or tails==:two
      tails=:right if tails==1 or tails==:positive
      tails=:left if tails==:negative
      case tails
        when :left then cdf
        when :right then 1-cdf
        when :both 
          if cdf>=0.5
            cdf=1-cdf
          end
          2*cdf
      end
    end
    # Get critical t to create confidence interval 
    def t_critical(confidence_level, df)
      -Distribution::T.p_value((1-confidence_level) / 2.0, df)
    end
    # Get critical z to create confidence interval 
    def z_critical(confidence_level)
      -Distribution::Z.p_value((1-confidence_level) / 2.0)
    end
    
    extend self
    # Calculate chi square for two Matrix
    class << self
      def chi_square(observed, expected=nil)
        case observed
          when Vector
            ChiSquare::WithVector.new(observed,expected)
          when Matrix
            ChiSquare::WithMatrix.new(observed,expected)
          else
            raise "Not implemented for #{observed.class}"
        end
      end
      # Shorthand for Statsample::Test::UMannWhitney.new
      # 
      # * <tt>v1</tt> and <tt>v2</tt> should be Statsample::Vector.
      def u_mannwhitney(v1, v2)
        Statsample::Test::UMannWhitney.new(v1,v2)
      end
      # Shorthand for Statsample::Test::T::OneSample.new
      def t_one_sample(vector, opts=Hash.new)
        Statsample::Test::T::OneSample.new(vector,opts)
      end
      # Shorthand for Statsample::Test::T::TwoSamplesIndependent.new
      def t_two_samples_independent(v1,v2, opts=Hash.new)
        Statsample::Test::T::TwoSamplesIndependent.new(v1,v2,opts)
      end
      # Shorthand for Statsample::Test::WilcoxonSignedRank.new
	  def wilcoxon_signed_rank(v1,v2,opts=Hash.new)
		Statsample::Test::WilcoxonSignedRank.new(v1,v2,opts)
	  end
      # Shorthand for Statsample::Test::Levene.new
      def levene(input, opts=Hash.new)
        Statsample::Test::Levene.new(input,opts)
      end
      
    end
  end
end


================================================
FILE: lib/statsample/vector/gsl.rb
================================================
module Statsample
  class Vector
    module GSL_      
      def clear_gsl
        @gsl=nil
      end
      
      def set_valid_data
        clear_gsl
        set_valid_data_ruby
      end
      def push(v)
        # If data is GSL::Vector, should be converted first to an Array
        if @data.is_a? GSL::Vector
          @data=@data.to_a
        end
        push_ruby(v)
      end
      
      def gsl
        @gsl||=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
      end
      
      alias :to_gsl :gsl
      def vector_standarized_compute(m,sd)
        if flawed?
          vector_standarized_compute_ruby(m,sd)
        else
          gsl.collect {|x| (x.to_f - m).quo(sd)}.to_scale
        end
      end
      
      def vector_centered_compute(m)
        if flawed?
          vector_centered_compute_ruby(m)
        else
          gsl.collect {|x| (x.to_f - m)}.to_scale
        end
      end
      def sample_with_replacement(sample=1)
        if(@type!=:scale)
          sample_with_replacement_ruby(sample)
        else
          r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
          Statsample::Vector.new(r.sample(gsl, sample).to_a,:scale)
        end
      end
      
      def sample_without_replacement(sample=1)
        if(@type!=:scale)
          sample_without_replacement_ruby(sample)
        else
          r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
          r.choose(gsl, sample).to_a
        end
      end
      def median
        if @type!=:scale
          median_ruby
        else
          sorted=GSL::Vector.alloc(@scale_data.sort)
          GSL::Stats::median_from_sorted_data(sorted)
        end
      end
      
      def sum 
        check_type :scale
        gsl.nil? ? nil : gsl.sum
      end
      def mean
        check_type :scale
        gsl.nil? ? nil : gsl.mean
      end				
      def variance_sample(m=nil)
        check_type :scale
        m||=mean
        gsl.nil? ? nil : gsl.variance_m
      end
      
      def standard_deviation_sample(m=nil)
        check_type :scale
        m||=mean
        gsl.nil? ? nil : gsl.sd(m)
      end

      def variance_population(m=nil) # :nodoc:
        check_type :scale    
        m||=mean
        gsl.nil? ? nil : gsl.variance_with_fixed_mean(m)
      end
      def standard_deviation_population(m=nil) # :nodoc:
        check_type :scale
        m||=mean
        gsl.nil? ? nil : gsl.sd_with_fixed_mean(m)
      end
      def skew # :nodoc:
        check_type :scale
        gsl.nil? ? nil : gsl.skew
      end
      def kurtosis # :nodoc:
        check_type :scale
        gsl.nil? ? nil : gsl.kurtosis
      end
    end
  end
end


================================================
FILE: lib/statsample/vector.rb
================================================
require 'date'
require 'statsample/vector/gsl'

module Statsample::VectorShorthands
  # Creates a new Statsample::Vector object
  # Argument should be equal to Vector.new
  def to_vector(*args)
		Statsample::Vector.new(self,*args)
	end
  # Creates a new Statsample::Vector object of type :scale
  def to_scale(*args)
    Statsample::Vector.new(self, :scale, *args)
  end
end

class Array
  include Statsample::VectorShorthands
end

if Statsample.has_gsl?
  module GSL
    class Vector
      include Statsample::VectorShorthands
    end
  end
end
module Statsample


  # Collection of values on one dimension. Works as a column on a Spreadsheet.
  #
  # == Usage
  # The fast way to create a vector uses Array.to_vector or Array.to_scale.
  #
  #  v=[1,2,3,4].to_vector(:scale)
  #  v=[1,2,3,4].to_scale
  #
  class Vector
    include Enumerable
    include Writable
    include Summarizable
    include Statsample::VectorShorthands

    # Level of measurement. Could be :nominal, :ordinal or :scale
    attr_reader :type
    # Original data.
    attr_reader :data
    # Valid data. Equal to data, minus values assigned as missing values
    attr_reader :valid_data
    # Array of values considered as missing. Nil is a missing value, by default
    attr_reader :missing_values
    # Array of values considered as "Today", with date type. "NOW", "TODAY", :NOW and :TODAY are 'today' values, by default
    attr_reader :today_values
    # Missing values array
    attr_reader :missing_data
    # Original data, with all missing values replaced by nils
    attr_reader :data_with_nils
    # Date date, with all missing values replaced by nils
    attr_reader :date_data_with_nils
    # Change label for specific values
    attr_accessor :labels
    # Name of vector. Should be used for output by many classes
    attr_accessor :name

    # Creates a new Vector object.
    # * <tt>data</tt> Any data which can be converted on Array
    # * <tt>type</tt> Level of meausurement. See Vector#type
    # * <tt>opts</tt> Hash of options
    #   * <tt>:missing_values</tt>  Array of missing values. See Vector#missing_values
    #   * <tt>:today_values</tt> Array of 'today' values. See Vector#today_values
    #   * <tt>:labels</tt> Labels for data values
    #   * <tt>:name</tt> Name of vector
    def initialize(data=[], type=:nominal, opts=Hash.new)
      @data=data.is_a?(Array) ? data : data.to_a
      @type=type
      opts_default={
        :missing_values=>[],
        :today_values=>['NOW','TODAY', :NOW, :TODAY],
        :labels=>{},
        :name=>nil
      }
      @opts=opts_default.merge(opts)
      if  @opts[:name].nil?
        @@n_table||=0
        @@n_table+=1
        @opts[:name]="Vector #{@@n_table}"
      end
      @missing_values=@opts[:missing_values]
      @labels=@opts[:labels]
      @today_values=@opts[:today_values]
      @name=@opts[:name]
      @valid_data=[]
      @data_with_nils=[]
      @date_data_with_nils=[]
      @missing_data=[]
      @has_missing_data=nil
      @scale_data=nil
      set_valid_data
      self.type=type
    end
    # Create a vector using (almost) any object
    # * Array: flattened
    # * Range: transformed using to_a
    # * Statsample::Vector
    # * Numeric and string values
    def self.[](*args)
      values=[]
      args.each do |a|
        case a
        when Array
          values.concat a.flatten
        when Statsample::Vector
          values.concat a.to_a
        when Range
          values.concat  a.to_a
        else
          values << a
        end
      end
      vector=new(values)
      vector.type=:scale if vector.can_be_scale?
      vector
    end
    # Create a new scale type vector
    # Parameters
    # [n]      Size
    # [val]    Value of each value
    # [&block] If block provided, is used to set the values of vector
    def self.new_scale(n,val=nil, &block)
      if block
        vector=n.times.map {|i| block.call(i)}.to_scale
      else
        vector=n.times.map { val}.to_scale
      end
      vector.type=:scale
      vector
    end
    # Creates a duplicate of the Vector.
    # Note: data, missing_values and labels are duplicated, so
    # changes on original vector doesn't propages to copies.
    def dup
      Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name)
    end
    # Returns an empty duplicate of the vector. Maintains the type,
    # missing values and labels.
    def dup_empty
      Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=> @name)
    end

    if Statsample::STATSAMPLE__.respond_to?(:check_type)
      # Raises an exception if type of vector is inferior to t type
      def check_type(t)
        Statsample::STATSAMPLE__.check_type(self,t)
      end
    else
      def check_type(t) #:nodoc:
        _check_type(t)
      end
    end


    def _check_type(t) #:nodoc:
      raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal) or (t==:date) or (:date==@type)
    end

    def vector_standarized_compute(m,sd) # :nodoc:
      @data_with_nils.collect{|x| x.nil? ? nil : (x.to_f - m).quo(sd) }.to_vector(:scale)
    end
    # Return a vector usign the standarized values for data
    # with sd with denominator n-1. With variance=0 or mean nil,
    # returns a vector of equal size full of nils
    #
    def vector_standarized(use_population=false)
      check_type :scale
      m=mean
      sd=use_population ? sdp : sds
      return ([nil]*size).to_scale if mean.nil? or sd==0.0
      vector=vector_standarized_compute(m,sd)
      vector.name=_("%s(standarized)")  % @name
      vector
    end
    def vector_centered_compute(m) #:nodoc:
      @data_with_nils.collect {|x| x.nil? ? nil : x.to_f-m }.to_scale
    end
    # Return a centered vector
    def vector_centered
      check_type :scale
      m=mean
      return ([nil]*size).to_scale if mean.nil?
      vector=vector_centered_compute(m)
      vector.name=_("%s(centered)") % @name
      vector
    end

    alias_method :standarized, :vector_standarized
    alias_method  :centered, :vector_centered
    # Return a vector with values replaced with the percentiles
    # of each values
    def vector_percentil
      check_type :ordinal
      c=@valid_data.size
      vector=ranked.map {|i| i.nil? ? nil : (i.quo(c)*100).to_f }.to_vector(@type)
      vector.name=_("%s(percentil)")  % @name
      vector
    end
    def box_cox_transformation(lambda) # :nodoc:
      raise "Should be a scale" unless @type==:scale
      @data_with_nils.collect{|x|
      if !x.nil?
        if(lambda==0)
          Math.log(x)
        else
          (x**lambda-1).quo(lambda)
        end
      else
        nil
      end
      }.to_vector(:scale)
    end

    # Vector equality.
    # Two vector will be the same if their data, missing values, type, labels are equals
    def ==(v2)
      return false unless v2.instance_of? Statsample::Vector
      @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels==v2.labels
    end

    def _dump(i) # :nodoc:
      Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type,'name'=>@name})
    end

    def self._load(data) # :nodoc:
    h=Marshal.load(data)
    Vector.new(h['data'], h['type'], :missing_values=> h['missing_values'], :labels=>h['labels'], :name=>h['name'])
    end
    # Returns a new vector, with data modified by block.
    # Equivalent to create a Vector after #collect on data
    def recode(type=nil)
      type||=@type
      @data.collect{|x|
        yield x
      }.to_vector(type)
    end
    # Modifies current vector, with data modified by block.
    # Equivalent to #collect! on @data
    def recode!
    @data.collect!{|x|
      yield x
    }
    set_valid_data
    end
    def push(v)
      @data.push(v)
      set_valid_data
    end
    # Dicotomize the vector with 0 and 1, based on lowest value
    # If parameter if defined, this value and lower
    # will be 0 and higher, 1
    def dichotomize(low=nil)
      fs=factors
      low||=factors.min
      @data_with_nils.collect{|x|
        if x.nil?
          nil
        elsif x>low
          1
        else
          0
        end
      }.to_scale
    end
    # Iterate on each item.
    # Equivalent to
    #   @data.each{|x| yield x}
    def each
      @data.each{|x| yield(x) }
    end

    # Iterate on each item, retrieving index
    def each_index
    (0...@data.size).each {|i|
      yield(i)
    }
    end
    # Add a value at the end of the vector.
    # If second argument set to false, you should update the Vector usign
    # Vector.set_valid_data at the end of your insertion cycle
    #
    def add(v,update_valid=true)
      @data.push(v)
      set_valid_data if update_valid
    end
    # Update valid_data, missing_data, data_with_nils and gsl
    # at the end of an insertion.
    #
    # Use after Vector.add(v,false)
    # Usage:
    #   v=Statsample::Vector.new
    #   v.add(2,false)
    #   v.add(4,false)
    #   v.data
    #   => [2,3]
    #   v.valid_data
    #   => []
    #   v.set_valid_data
    #   v.valid_data
    #   => [2,3]
    def set_valid_data
      @valid_data.clear
      @missing_data.clear
      @data_with_nils.clear
      @date_data_with_nils.clear
      set_valid_data_intern
      set_scale_data if(@type==:scale)
      set_date_data if(@type==:date)
    end
    if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
      def set_valid_data_intern #:nodoc:
        Statsample::STATSAMPLE__.set_valid_data_intern(self)
      end
    else
      def set_valid_data_intern #:nodoc:
        _set_valid_data_intern
      end
    end
    def _set_valid_data_intern #:nodoc:
      @data.each do |n|
        if is_valid? n
          @valid_data.push(n)
          @data_with_nils.push(n)
        else
          @data_with_nils.push(nil)
          @missing_data.push(n)
        end
      end
      @has_missing_data=@missing_data.size>0
    end

    # Retrieves true if data has one o more missing values
    def has_missing_data?
      @has_missing_data
    end
    alias :flawed? :has_missing_data?

    # Retrieves label for value x. Retrieves x if
    # no label defined.
    def labeling(x)
      @labels.has_key?(x) ? @labels[x].to_s : x.to_s
    end
    alias :label :labeling
    # Returns a Vector with data with labels replaced by the label.
    def vector_labeled
      d=@data.collect{|x|
        if @labels.has_key? x
          @labels[x]
        else
          x
        end
      }
      Vector.new(d,@type)
    end
    # Size of total data
    def size
      @data.size
    end
    alias_method :n, :size

    # Retrieves i element of data
    def [](i)
      @data[i]
    end
    # Set i element of data.
    # Note: Use set_valid_data if you include missing values
    def []=(i,v)
      @data[i]=v
    end
    # Return true if a value is valid (not nil and not included on missing values)
    def is_valid?(x)
      !(x.nil? or @missing_values.include? x)
    end
    # Set missing_values.
    # set_valid_data is called after changes
    def missing_values=(vals)
      @missing_values = vals
      set_valid_data
    end
    # Set data considered as "today" on data vectors
    def today_values=(vals)
      @today_values = vals
      set_valid_data
    end
    # Set level of measurement.
    def type=(t)
      @type=t
      set_scale_data if(t==:scale)
      set_date_data if (t==:date)
    end
    def to_a
      if @data.is_a? Array
        @data.dup
      else
        @data.to_a
      end
    end
    alias_method :to_ary, :to_a

    # Vector sum.
    # - If v is a scalar, add this value to all elements
    # - If v is a Array or a Vector, should be of the same size of this vector
    #   every item of this vector will be added to the value of the
    #   item at the same position on the other vector
    def +(v)
    _vector_ari("+",v)
    end
    # Vector rest.
    # - If v is a scalar, rest this value to all elements
    # - If v is a Array or a Vector, should be of the same
    #   size of this vector
    #   every item of this vector will be rested to the value of the
    #   item at the same position on the other vector

    def -(v)
    _vector_ari("-",v)
    end

    def *(v)
      _vector_ari("*",v)
    end
    # Reports all values that doesn't comply with a condition.
    # Returns a hash with the index of data and the invalid data.
    def verify
    h={}
    (0...@data.size).to_a.each{|i|
      if !(yield @data[i])
        h[i]=@data[i]
      end
    }
    h
    end
    def _vector_ari(method,v) # :nodoc:
    if(v.is_a? Vector or v.is_a? Array)
      raise ArgumentError, "The array/vector parameter (#{v.size}) should be of the same size of the original vector (#{@data.size})" unless v.size==@data.size
      sum=[]
      v.size.times {|i|
          if((v.is_a? Vector and v.is_valid?(v[i]) and is_valid?(@data[i])) or (v.is_a? Array and !v[i].nil? and !data[i].nil?))
              sum.push(@data[i].send(method,v[i]))
          else
              sum.push(nil)
          end
      }
      Statsample::Vector.new(sum, :scale)
    elsif(v.respond_to? method )
      Statsample::Vector.new(
        @data.collect  {|x|
          if(!x.nil?)
            x.send(method,v)
          else
            nil
          end
        } , :scale)
    else
        raise TypeError,"You should pass a scalar or a array/vector"
    end

    end
    # Return an array with the data splitted by a separator.
    #   a=Vector.new(["a,b","c,d","a,b","d"])
    #   a.splitted
    #     =>
    #   [["a","b"],["c","d"],["a","b"],["d"]]
    def splitted(sep=Statsample::SPLIT_TOKEN)
    @data.collect{|x|
      if x.nil?
        nil
      elsif (x.respond_to? :split)
        x.split(sep)
      else
        [x]
      end
    }
    end
    # Returns a hash of Vectors, defined by the different values
    # defined on the fields
    # Example:
    #
    #  a=Vector.new(["a,b","c,d","a,b"])
    #  a.split_by_separator
    #  =>  {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88
    #        @data=[1, 0, 1]>,
    #       "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48
    #        @data=[1, 1, 0]>,
    #      "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08
    #        @data=[0, 1, 1]>}
    #
    def split_by_separator(sep=Statsample::SPLIT_TOKEN)
    split_data=splitted(sep)
    factors=split_data.flatten.uniq.compact
    out=factors.inject({}) {|a,x|
      a[x]=[]
      a
    }
    split_data.each do |r|
      if r.nil?
        factors.each do |f|
          out[f].push(nil)
        end
      else
        factors.each do |f|
          out[f].push(r.include?(f) ? 1:0)
        end
      end
    end
    out.inject({}){|s,v|
      s[v[0]]=Vector.new(v[1],:nominal)
      s
    }
    end
    def split_by_separator_freq(sep=Statsample::SPLIT_TOKEN)
      split_by_separator(sep).inject({}) {|a,v|
        a[v[0]]=v[1].inject {|s,x| s+x.to_i}
        a
      }
    end

    # == Bootstrap
    # Generate +nr+ resamples (with replacement) of size  +s+
    # from vector, computing each estimate from +estimators+
    # over each resample.
    # +estimators+ could be
    # a) Hash with variable names as keys and lambdas as  values
    #   a.bootstrap(:log_s2=>lambda {|v| Math.log(v.variance)},1000)
    # b) Array with names of method to bootstrap
    #   a.bootstrap([:mean, :sd],1000)
    # c) A single method to bootstrap
    #   a.jacknife(:mean, 1000)
    # If s is nil, is set to vector size by default.
    #
    # Returns a dataset where each vector is an vector
    # of length +nr+ containing the computed resample estimates.
    def bootstrap(estimators, nr, s=nil)
      s||=n

      h_est, es, bss= prepare_bootstrap(estimators)


      nr.times do |i|
        bs=sample_with_replacement(s)
        es.each do |estimator|
          # Add bootstrap
          bss[estimator].push(h_est[estimator].call(bs))
        end
      end

      es.each do |est|
        bss[est]=bss[est].to_scale
        bss[est].type=:scale
      end
      bss.to_dataset

    end

    # == Jacknife
    # Returns a dataset with jacknife delete-+k+ +estimators+
    # +estimators+ could be:
    # a) Hash with variable names as keys and lambdas as values
    #   a.jacknife(:log_s2=>lambda {|v| Math.log(v.variance)})
    # b) Array with method names to jacknife
    #   a.jacknife([:mean, :sd])
    # c) A single method to jacknife
    #   a.jacknife(:mean)
    # +k+ represent the block size for block jacknife. By default
    # is set to 1, for classic delete-one jacknife.
    #
    # Returns a dataset where each vector is an vector
    # of length +cases+/+k+ containing the computed jacknife estimates.
    #
    # == Reference:
    # * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
    def jacknife(estimators, k=1)
      raise "n should be divisible by k:#{k}" unless n%k==0

      nb=(n / k).to_i


      h_est, es, ps= prepare_bootstrap(estimators)

      est_n=es.inject({}) {|h,v|
        h[v]=h_est[v].call(self)
        h
      }


      nb.times do |i|
        other=@data_with_nils.dup
        other.slice!(i*k,k)
        other=other.to_scale
        es.each do |estimator|
          # Add pseudovalue
          ps[estimator].push( nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other))
        end
      end


      es.each do |est|
        ps[est]=ps[est].to_scale
        ps[est].type=:scale
      end
      ps.to_dataset
    end


    # For an array or hash of estimators methods, returns
    # an array with three elements
    # 1.- A hash with estimators names as keys and lambdas as values
    # 2.- An array with estimators names
    # 3.- A Hash with estimators names as keys and empty arrays as values
    def prepare_bootstrap(estimators)
      h_est=estimators

      h_est=[h_est] unless h_est.is_a? Array or h_est.is_a? Hash

      if h_est.is_a? Array
        h_est=h_est.inject({}) {|h,est|
          h[est]=lambda {|v| v.send(est)}
          h
        }
      end

      bss=h_est.keys.inject({}) {|h,v| h[v]=[];h}

      [h_est,h_est.keys, bss]

    end
    private :prepare_bootstrap

    # Returns an random sample of size n, with replacement,
    # only with valid data.
    #
    # In all the trails, every item have the same probability
    # of been selected.
    def sample_with_replacement(sample=1)
      vds=@valid_data.size
      (0...sample).collect{ @valid_data[rand(vds)] }
    end
    # Returns an random sample of size n, without replacement,
    # only with valid data.
    #
    # Every element could only be selected once.
    #
    # A sample of the same size of the vector is the vector itself.

    def sample_without_replacement(sample=1)
      raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
      out=[]
      size=@valid_data.size
      while out.size<sample
        value=rand(size)
        out.push(value) if !out.include?value
      end
      out.collect{|i| @data[i]}
    end
    # Retrieves number of cases which comply condition.
    # If block given, retrieves number of instances where
    # block returns true.
    # If other values given, retrieves the frequency for
    # this value.
    def count(x=false)
    if block_given?
      r=@data.inject(0) {|s, i|
        r=yield i
        s+(r ? 1 : 0)
      }
      r.nil? ? 0 : r
    else
      frequencies[x].nil? ? 0 : frequencies[x]
    end
    end

    # Returns the database type for the vector, according to its content

    def db_type(dbs='mysql')
    # first, detect any character not number
    if @data.find {|v|  v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v|  v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
      return "DATE"
    elsif @data.find {|v|  v.to_s=~/[^0-9e.-]/ }
      return "VARCHAR (255)"
    elsif @data.find {|v| v.to_s=~/\./}
      return "DOUBLE"
    else
      return "INTEGER"
    end
    end
    # Return true if all data is Date, "today" values or nil
    def can_be_date?
    if @data.find {|v|
    !v.nil? and !v.is_a? Date and !v.is_a? Time and (v.is_a? String and !@today_values.include? v) and (v.is_a? String and !(v=~/\d{4,4}[-\/]\d{1,2}[-\/]\d{1,2}/))}
      false
    else
      true
    end
    end
    # Return true if all data is Numeric or nil
    def can_be_scale?
      if @data.find {|v| !v.nil? and !v.is_a? Numeric and !@missing_values.include? v}
        false
      else
        true
      end
    end

    def to_s
      sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
    end
    # Ugly name. Really, create a Vector for standard 'matrix' package.
    # <tt>dir</tt> could be :horizontal or :vertical
    def to_matrix(dir=:horizontal)
      case dir
      when :horizontal
        Matrix[@data]
      when :vertical
        Matrix.columns([@data])
      end
    end
    def inspect
      self.to_s
    end
    # Retrieves uniques values for data.
    def factors
      if @type==:scale
        @scale_data.uniq.sort
      elsif @type==:date
        @date_data_with_nils.uniq.sort
      else
        @valid_data.uniq.sort
      end
    end
    if Statsample::STATSAMPLE__.respond_to?(:frequencies)
      # Returns a hash with the distribution of frecuencies for
      # the sample
      def frequencies
        Statsample::STATSAMPLE__.frequencies(@valid_data)
      end
    else
      def frequencies #:nodoc:
        _frequencies
      end
    end


    def _frequencies #:nodoc:
      @valid_data.inject(Hash.new) {|a,x|
        a[x]||=0
        a[x]=a[x]+1
        a
      }
    end

    # Returns the most frequent item.
    def mode
      frequencies.max{|a,b| a[1]<=>b[1]}.first
    end
    # The numbers of item with valid data.
    def n_valid
      @valid_data.size
    end
    # Returns a hash with the distribution of proportions of
    # the sample.
    def proportions
        frequencies.inject({}){|a,v|
            a[v[0]] = v[1].quo(n_valid)
            a
        }
    end
    # Proportion of a given value.
    def proportion(v=1)
        frequencies[v].quo(@valid_data.size)
    end
    def report_building(b)
      b.section(:name=>name) do |s|
        s.text _("n :%d") % n
        s.text _("n valid:%d") % n_valid
        if @type==:nominal
          s.text  _("factors:%s") % factors.join(",")
          s.text   _("mode: %s") % mode

          s.table(:name=>_("Distribution")) do |t|
            frequencies.sort.each do |k,v|
              key=labels.has_key?(k) ? labels[k]:k
              t.row [key, v , ("%0.2f%%" % (v.quo(n_valid)*100))]
            end
          end
        end

        s.text _("median: %s") % median.to_s if(@type==:ordinal or @type==:scale)
        if(@type==:scale)
          s.text _("mean: %0.4f") % mean
          if sd
            s.text _("std.dev.: %0.4f") % sd
            s.text _("std.err.: %0.4f") % se
            s.text _("skew: %0.4f") % skew
            s.text _("kurtosis: %0.4f") % kurtosis
          end
        end
      end
    end

      # Variance of p, according to poblation size
      def variance_proportion(n_poblation, v=1)
        Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
      end
      # Variance of p, according to poblation size
      def variance_total(n_poblation, v=1)
        Statsample::total_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
      end
      def proportion_confidence_interval_t(n_poblation,margin=0.95,v=1)
        Statsample::proportion_confidence_interval_t(proportion(v), @valid_data.size, n_poblation, margin)
      end
      def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
        Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
      end

      self.instance_methods.find_all{|met| met=~/_slow$/}.each do |met|
          met_or=met.gsub("_slow","")
          if !self.method_defined?(met_or)
              alias_method met_or, met
          end
      end

      ######
      ### Ordinal Methods
      ######

      # == Percentil
      # Returns the value of the percentile q
      #
      # Accepts an optional second argument specifying the strategy to interpolate
      # when the requested percentile lies between two data points a and b
      # Valid strategies are:
      # * :midpoint (Default): (a + b) / 2
      # * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
      # This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
      #
      def percentil(q, strategy = :midpoint)
        check_type :ordinal
        sorted=@valid_data.sort

        case strategy
        when :midpoint
          v = (n_valid * q).quo(100)
          if(v.to_i!=v)
            sorted[v.to_i]
          else
            (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
          end
        when :linear
          index = (q / 100.0) * (n_valid + 1)

          k = index.truncate
          d = index % 1

          if k == 0
            sorted[0]
          elsif k >= sorted.size
            sorted[-1]
          else
            sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
          end
        else
          raise NotImplementedError.new "Unknown strategy #{strategy.to_s}"
        end
      end

      # Returns a ranked vector.
      def ranked(type=:ordinal)
        check_type :ordinal
        i=0
        r=frequencies.sort.inject({}){|a,v|
          a[v[0]]=(i+1 + i+v[1]).quo(2)
          i+=v[1]
          a
        }
        @data.collect {|c| r[c] }.to_vector(type)
      end
      # Return the median (percentil 50)
      def median
        check_type :ordinal
        percentil(50)
      end
      # Minimun value
      def min
        check_type :ordinal
        @valid_data.min
      end
        # Maximum value
      def max
        check_type :ordinal
        @valid_data.max
      end

    def set_date_data
      @date_data_with_nils=@data.collect do|x|
        if x.is_a? Date
          x
        elsif x.is_a? Time
          Date.new(x.year, x.month, x.day)
        elsif x.is_a? String and x=~/(\d{4,4})[-\/](\d{1,2})[-\/](\d{1,2})/
          Date.new($1.to_i,$2.to_i,$3.to_i)
        elsif @today_values.include? x
          Date.today()
        elsif @missing_values.include? x or x.nil?
          nil
        end
      end
    end

    def set_scale_data
      @scale_data=@valid_data.collect do|x|
        if x.is_a? Numeric
          x
        elsif x.is_a? String and x.to_i==x.to_f
          x.to_i
        else
          x.to_f
        end
      end
    end

    private :set_date_data, :set_scale_data

    # The range of the data (max - min)
    def range;
      check_type :scale
      @scale_data.max - @scale_data.min
    end
    # The sum of values for the data
    def sum
      check_type :scale
      @scale_data.inject(0){|a,x|x+a} ;
    end
    # The arithmetical mean of data
    def mean
      check_type :scale
      sum.to_f.quo(n_valid)
    end
    # Sum of squares for the data around a value.
    # By default, this value is the  mean
    #   ss= sum{(xi-m)^2}
    #
    def sum_of_squares(m=nil)
      check_type :scale
      m||=mean
      @scale_data.inject(0){|a,x| a+(x-m).square}
    end
    # Sum of squared deviation
    def sum_of_squared_deviation
      check_type :scale
      @scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
    end

    # Population variance (denominator N)
    def variance_population(m=nil)
      check_type :scale
      m||=mean
      squares=@scale_data.inject(0){|a,x| x.square+a}
      squares.quo(n_valid) - m.square
    end


    # Population Standard deviation (denominator N)
    def standard_deviation_population(m=nil)
      check_type :scale
      Math::sqrt( variance_population(m) )
    end

    # Population average deviation (denominator N)
    # author: Al Chou

    def average_deviation_population( m = nil )
      check_type :scale
      m ||= mean
      ( @scale_data.inject( 0 ) { |a, x| ( x - m ).abs + a } ).quo( n_valid )
    end
    def median_absolute_deviation
      med=median
      recode {|x| (x-med).abs}.median
    end
    alias  :mad :median_absolute_deviation
    # Sample Variance (denominator n-1)
    def variance_sample(m=nil)
      check_type :scale
      m||=mean
      sum_of_squares(m).quo(n_valid - 1)
    end

    # Sample Standard deviation (denominator n-1)
    def standard_deviation_sample(m=nil)
        check_type :scale
        m||=mean
        Math::sqrt(variance_sample(m))
    end
    # Skewness of the sample
    def skew(m=nil)
        check_type :scale
        m||=mean
        th=@scale_data.inject(0){|a,x| a+((x-m)**3)}
        th.quo((@scale_data.size)*sd(m)**3)
    end
    # Kurtosis of the sample
    def kurtosis(m=nil)
        check_type :scale
        m||=mean
        fo=@scale_data.inject(0){|a,x| a+((x-m)**4)}
        fo.quo((@scale_data.size)*sd(m)**4)-3

    end
    # Product of all values on the sample
    #
    def product
        check_type :scale
        @scale_data.inject(1){|a,x| a*x }
    end

    # With a fixnum, creates X bins within the range of data
    # With an Array, each value will be a cut point
    def histogram(bins=10)
      check_type :scale

      if bins.is_a? Array
        #h=Statsample::Histogram.new(self, bins)
        h=Statsample::Histogram.alloc(bins)
      else
        # ugly patch. The upper limit for a bin has the form
        # x < range
        #h=Statsample::Histogram.new(self, bins)
        min,max=Statsample::Util.nice(@valid_data.min,@valid_data.max)
        # fix last data
        if max==@valid_data.max
          max+=1e-10
        end
        h=Statsample::Histogram.alloc(bins,[min,max])
        # Fix last bin

      end
      h.increment(@valid_data)
      h
    end

    # Coefficient of variation
    # Calculed with the sample standard deviation
    def coefficient_of_variation
        check_type :scale
        standard_deviation_sample.quo(mean)
    end
    # Standard error of the distribution mean
    # Calculated using sd/sqrt(n)
    def standard_error
      standard_deviation_sample.quo(Math.sqrt(valid_data.size))
    end
    alias :se :standard_error

    alias_method :sdp, :standard_deviation_population
    alias_method :sds, :standard_deviation_sample
    alias_method :adp, :average_deviation_population
    alias_method :cov, :coefficient_of_variation
    alias_method :variance, :variance_sample
    alias_method :sd, :standard_deviation_sample
    alias_method :ss, :sum_of_squares
    include_aliasing Statsample::Vector::GSL_ if Statsample.has_gsl?
  end
end


================================================
FILE: lib/statsample/version.rb
================================================
module Statsample
  VERSION = '1.4.0'
end


================================================
FILE: lib/statsample.rb
================================================
# = statsample.rb - 
# Statsample - Statistic package for Ruby
# Copyright (C) 2008-2014  Claudio Bustos
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#


#$:.unshift(File.dirname(__FILE__))
require 'matrix'
require 'extendmatrix'
require 'distribution'
require 'dirty-memoize'
require 'reportbuilder'


class Numeric
  def square ; self * self ; end
end

class String
  def is_number?
    if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
      true
    else
      false
    end
  end
end

class Module
  def include_aliasing(m, suffix="ruby")
    m.instance_methods.each do |f|
      if instance_methods.include? f
        alias_method("#{f}_#{suffix}",f) 
        remove_method f
      end
    end
    include m
  end
end

class Array
  # Recode repeated values on an array, adding the number of repetition
  # at the end
  # Example:
  #   a=%w{a b c c d d d e}
  #   a.recode_repeated
  #   => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
  def recode_repeated
    if self.size!=self.uniq.size
      # Find repeated
      repeated=self.inject({}) {|a,v|
      (a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v| k}
      ns=repeated.inject({}) {|a,v| a[v]=0;a}
      self.collect do |f|
        if repeated.include? f
          ns[f]+=1
          sprintf("%s_%d",f,ns[f])
        else
          f
        end
      end
    else
      self
    end
  end
end

def create_test(*args,&proc) 
  description=args.shift
  fields=args
  [description, fields, Proc.new]
end
#--
# Test extensions
begin
  require 'gettext'
rescue LoadError
  def bindtextdomain(d) #:nodoc:
  d
  end
  
  # Bored module
  module GetText  #:nodoc:
    def _(t)  
        t
    end
  end
end
# Library for statistical analysis on Ruby
#
# * Classes for manipulation and storage of data:
# * Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
# * Multiple types of regression on Statsample::Regression
# * Factorial Analysis algorithms on Statsample::Factor module.
# * Dominance Analysis. Based on Budescu and Azen papers.link[http://psycnet.apa.org/journals/met/8/2/129/]. 
# * Module Statsample::Codification, to help to codify open questions
# * Converters to import and export data from databases, csv and excel files.
# * Module Statsample::Crosstab provides function to create crosstab for categorical data
# * Reliability analysis provides functions to analyze scales.
# * Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
# * Interfaces to gdchart, gnuplot and SVG::Graph 
#
module Statsample
  
  def self.create_has_library(library)
    define_singleton_method("has_#{library}?") do
      cv="@@#{library}"
      if !class_variable_defined? cv
        begin 
          require library.to_s
          class_variable_set(cv,true)
        rescue LoadError
          class_variable_set(cv,false)
        end
      end
      class_variable_get(cv)
    end
  end
  
  create_has_library :gsl
  
  SPLIT_TOKEN = ","
  autoload(:Analysis, 'statsample/analysis')
  autoload(:Database, 'statsample/converters')
  autoload(:Anova, 'statsample/anova')
  autoload(:CSV, 'statsample/converters')
  autoload(:PlainText, 'statsample/converters')
  autoload(:Excel, 'statsample/converters')
  autoload(:GGobi, 'statsample/converters')
  autoload(:SPSS, 'statsample/converter/spss')
  autoload(:Histogram, 'statsample/histogram')
  autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
  autoload(:HtmlReport, 'statsample/htmlreport')
  autoload(:Mx, 'statsample/converters')
  autoload(:Resample, 'statsample/resample')
  autoload(:SRS, 'statsample/srs')
  autoload(:Codification, 'statsample/codification')
  autoload(:Reliability, 'statsample/reliability')
  autoload(:Bivariate, 'statsample/bivariate')
  autoload(:Multivariate, 'statsample/multivariate')
  autoload(:Multiset, 'statsample/multiset')
  autoload(:StratifiedSample, 'statsample/multiset')
  autoload(:MLE, 'statsample/mle')    
  autoload(:Regression, 'statsample/regression')
  autoload(:Test, 'statsample/test')
  autoload(:Factor, 'statsample/factor')
  autoload(:Graph, 'statsample/graph')
  
  
  class << self
    # Load a object saved on a file.
    def load(filename)
      if File.exist? filename
        o=false
        File.open(filename,"r") {|fp| o=Marshal.load(fp) }
        o
      else
        false
      end
    end
    
    
    # Create a matrix using vectors as columns.
    # Use:
    #
    #   matrix=Statsample.vector_cols_matrix(v1,v2)
    def vector_cols_matrix(*vs)
      # test
      size=vs[0].size
      vs.each{|v|
        raise ArgumentError,"Arguments should be Vector" unless v.instance_of? Statsample::Vector
        raise ArgumentError,"Vectors size should be the same" if v.size!=size
      }
      Matrix.rows((0...size).to_a.collect() {|i|
        vs.collect{|v| v[i]}
      })
    end
    # Returns a duplicate of the input vectors, without missing data
    # for any of the vectors.
    # 
    #  a=[1,2,3,6,7,nil,3,5].to_scale
    #  b=[nil,nil,5,6,4,5,10,2].to_scale
    #  c=[2,4,6,7,4,5,6,7].to_scale
    #  a2,b2,c2=Statsample.only_valid(a,b,c)
    #  => [#<Statsample::Scale:0xb748c8c8 @data=[3, 6, 7, 3, 5]>, 
    #        #<Statsample::Scale:0xb748c814 @data=[5, 6, 4, 10, 2]>, 
    #        #<Statsample::Scale:0xb748c760 @data=[6, 7, 4, 6, 7]>]
    #
    def only_valid(*vs)
      i=1
      h=vs.inject({}) {|a,v| a["v#{i}"]=v;i+=1;a}
      ds=Statsample::Dataset.new(h).dup_only_valid
      ds.vectors.values
    end
    
    # Cheap version of #only_valid. 
    # If any vectors have missing_values, return only valid.
    # If not, return the vectors itself
    def only_valid_clone(*vs)
      if vs.any? {|v| v.flawed?}
        only_valid(*vs)
      else
        vs
      end
    end
  end  
  
  
  module Util
    # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
    def normal_order_statistic_medians(i,n)
      if i==1
        u= 1.0 - normal_order_statistic_medians(n,n)
      elsif i==n
        u=0.5**(1 / n.to_f)
      else
        u= (i - 0.3175) / (n + 0.365)
      end
      u
    end
    
    def self.nice(s,e) # :nodoc:
      reverse = e<s
      min = reverse ? e : s
      max = reverse ? s : e
      span=max-min
      return [s, e] if (span == 0 or (span.respond_to? :infinite? and span.infinite?))
      
      step=10**((Math::log(span).quo(Math::log(10))).round - 1).to_f  
      out=[(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
      out.reverse! if reverse
      out
    end
    
    
  end
  
  
  module Writable
    def save(filename)
      fp=File.open(filename,"w")
      Marshal.dump(self,fp)
      fp.close
    end        
  end
  # Provides method summary to generate summaries and include GetText
  module Summarizable
    include GetText
    bindtextdomain("statsample")
    def summary(method=:to_text)
      ReportBuilder.new(:no_title=>true).add(self).send(method)
    end
  end
  module STATSAMPLE__ #:nodoc:
  end
end


#--
begin 
  require 'statsamplert'
rescue LoadError
  module Statsample
    OPTIMIZED=false
  end
end

require 'statsample/vector'
require 'statsample/dataset'
require 'statsample/crosstab'
require 'statsample/matrix'
require 'statsample/shorthand'
require 'statsample/version'


================================================
FILE: po/es/statsample.po
================================================
msgid ""
msgstr ""
"Project-Id-Version: statsample 1.0.1\n"
"POT-Creation-Date: 2011-03-03 12:03-0300\n"
"PO-Revision-Date: 2011-03-03 12:05-0300\n"
"Last-Translator: Claudio Bustos <clbustos@gmail.com>\n"
"Language-Team: Desarrollador\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Poedit-Language: Spanish\n"
"X-Poedit-SourceCharset: utf-8\n"

#: lib/statsample/test/f.rb:26
msgid "F Test"
msgstr "Prueba F"

#: lib/statsample/test/t.rb:82
msgid "T Test"
msgstr "Prueba T"

#: lib/statsample/test/t.rb:83
msgid "Estimate"
msgstr "Estimado"

#: lib/statsample/test/t.rb:84
msgid "Std.Err.of Estimate"
msgstr "Err.Est. del Estimado"

#: lib/statsample/test/t.rb:114
msgid "%s: %0.4f | %s: %0.4f"
msgstr "%s: %0.4f | %s: %0.4f"

#: lib/statsample/test/t.rb:120
msgid "t(%d) = %0.4f, p=%0.4f (%s tails)"
msgstr "t(%d) = %0.4f, p=%0.4f (%s colas)"

#: lib/statsample/test/t.rb:121
msgid "CI(%d%%): %0.4f - %0.4f"
msgstr "IC(%d%%): %0.4f - %0.4f"

#: lib/statsample/test/t.rb:190
msgid "Sample mean: %0.4f | Sample sd: %0.4f | se : %0.4f"
msgstr "Media de la muestra: %0.4f | DE de la muestra: %0.4f | EE : %0.4f"

#: lib/statsample/test/t.rb:191
msgid "Population mean: %0.4f"
msgstr "Promedio población: %0.4f"

#: lib/statsample/test/t.rb:292
msgid "Mean and standard deviation"
msgstr "Promedio y desviación estándar"

#: lib/statsample/test/t.rb:292
#: lib/statsample/regression/simple.rb:109
#: lib/statsample/factor/pca.rb:216
#: lib/statsample/factor/principalaxis.rb:202
msgid "Variable"
msgstr "Variable"

#: lib/statsample/test/t.rb:292
#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "mean"
msgstr "promedio"

#: lib/statsample/test/t.rb:292
msgid "sd"
msgstr "de"

#: lib/statsample/test/t.rb:292
#: lib/statsample/factor/parallelanalysis.rb:103
#: lib/statsample/factor/parallelanalysis.rb:111
msgid "n"
msgstr "n"

#: lib/statsample/test/t.rb:296
msgid "Levene test for equality of variances"
msgstr "Test de Levene para igualdad de variancas"

#: lib/statsample/test/t.rb:298
msgid "T statistics"
msgstr "Estadístico T"

#: lib/statsample/test/t.rb:299
msgid "Equal variance"
msgstr "Varianza Igual"

#: lib/statsample/test/t.rb:300
msgid "Non equal variance"
msgstr "Varianza Desigual"

#: lib/statsample/test/t.rb:302
msgid "Effect size"
msgstr "Tamaño del efecto"

#: lib/statsample/test/umannwhitney.rb:140
msgid "Mann-Whitney's U"
msgstr "U de Mann-Whitney"

#: lib/statsample/test/umannwhitney.rb:149
msgid "%s results"
msgstr "resultados de %s"

#: lib/statsample/test/umannwhitney.rb:150
#: lib/statsample/test/umannwhitney.rb:151
msgid "Sum of ranks %s"
msgstr "Suma de rangos %s"

#: lib/statsample/test/umannwhitney.rb:152
msgid "U Value"
msgstr "Valor de U"

#: lib/statsample/test/umannwhitney.rb:153
msgid "Z"
msgstr "Z"

#: lib/statsample/test/umannwhitney.rb:155
msgid "Exact p (Dinneen & Blakesley, 1973):"
msgstr "p exacto (Dinneen & Blakesley, 1973):"

#: lib/statsample/test/levene.rb:37
msgid "Levene Test"
msgstr "Test de Levene"

#: lib/statsample/test/bartlettsphericity.rb:25
msgid "Bartlett's test of sphericity"
msgstr "Test de esfericidad de Bartlett"

#: lib/statsample/regression/multiple/baseengine.rb:27
msgid "Multiple Regression:  %s over %s"
msgstr "Regresión Múltiple: %s sobre %s"

#: lib/statsample/regression/multiple/baseengine.rb:40
msgid "Regression"
msgstr "Regresión"

#: lib/statsample/regression/multiple/baseengine.rb:40
msgid "Error"
msgstr "Error"

#: lib/statsample/regression/multiple/baseengine.rb:184
msgid "Engine: %s"
msgstr "Motor: %s"

#: lib/statsample/regression/multiple/baseengine.rb:185
msgid "Cases(listwise)=%d(%d)"
msgstr "Casos (sólo válidos)=%d(%d)"

#: lib/statsample/regression/multiple/baseengine.rb:186
msgid "R="
msgstr "R="

#: lib/statsample/regression/multiple/baseengine.rb:187
msgid "R^2="
msgstr "R^2="

#: lib/statsample/regression/multiple/baseengine.rb:188
msgid "R^2 Adj="
msgstr "R^2 Adj="

#: lib/statsample/regression/multiple/baseengine.rb:189
msgid "Std.Error R="
msgstr "Error estándar R="

#: lib/statsample/regression/multiple/baseengine.rb:191
msgid "Equation"
msgstr "Ecuación"

#: lib/statsample/regression/multiple/baseengine.rb:197
msgid "Beta coefficients"
msgstr "Coeficientes beta"

#: lib/statsample/regression/multiple/baseengine.rb:198
msgid "Constant"
msgstr "Constante"

#: lib/statsample/regression/multiple/matrixengine.rb:78
msgid "Multiple reggresion of %s on %s"
msgstr "Regresión Múltiple de %s en %s"

#: lib/statsample/regression/simple.rb:88
msgid "Regression of %s over %s"
msgstr "Regresión de %s sobre %s"

#: lib/statsample/regression/simple.rb:109
#: lib/statsample/factor/map.rb:105
#: lib/statsample/reliability/skillscaleanalysis.rb:92
msgid "Value"
msgstr "Valor"

#: lib/statsample/regression/simple.rb:110
msgid "r"
msgstr "r"

#: lib/statsample/regression/simple.rb:111
msgid "r^2"
msgstr "r^2"

#: lib/statsample/regression/simple.rb:112
msgid "a"
msgstr "a"

#: lib/statsample/regression/simple.rb:113
msgid "b"
msgstr "b"

#: lib/statsample/regression/simple.rb:114
msgid "s.e"
msgstr "e.e."

#: lib/statsample/dominanceanalysis/bootstrap.rb:115
msgid "Bootstrap dominance Analysis:  %s over %s"
msgstr "Resultados del Análisis de Dominancia Bootstrap: %s en %s"

#: lib/statsample/dominanceanalysis/bootstrap.rb:138
msgid "Bootstrap %d of %d"
msgstr "Bootstrap: %d de %d"

#: lib/statsample/dominanceanalysis/bootstrap.rb:177
msgid "Sample size: %d\n"
msgstr "Tamaño de muestra: %d\n"

#: lib/statsample/dominanceanalysis/bootstrap.rb:179
msgid "Linear Regression Engine: %s"
msgstr "Motor de Regresión Linear: %s"

#: lib/statsample/dominanceanalysis/bootstrap.rb:181
msgid "pairs"
msgstr "pares"

#: lib/statsample/dominanceanalysis/bootstrap.rb:181
msgid "SE(Dij)"
msgstr "EE(Dij)"

#: lib/statsample/dominanceanalysis/bootstrap.rb:181
msgid "Reproducibility"
msgstr "Reproducibilidad"

#: lib/statsample/dominanceanalysis/bootstrap.rb:182
msgid "Complete dominance"
msgstr "Dominancia Completa"

#: lib/statsample/dominanceanalysis/bootstrap.rb:190
msgid "Conditional dominance"
msgstr "Dominancia Condicional"

#: lib/statsample/dominanceanalysis/bootstrap.rb:199
msgid "General Dominance"
msgstr "Dominancia General"

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "General averages"
msgstr "Promedios generales"

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "var"
msgstr "var"

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "se"
msgstr "de"

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "p.5"
msgstr "p.5"

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "p.95"
msgstr "p.95"

#: lib/statsample/anova/twoway.rb:59
msgid "ANOVA Two-Way"
msgstr "Anova de dos vías"

#: lib/statsample/anova/twoway.rb:60
msgid "A"
msgstr "A"

#: lib/statsample/anova/twoway.rb:61
msgid "B"
msgstr "B"

#: lib/statsample/anova/twoway.rb:62
msgid "Within"
msgstr "Dentro"

#: lib/statsample/anova/twoway.rb:98
#: lib/statsample/anova/oneway.rb:57
msgid "%s Table"
msgstr "Tabla %s"

#: lib/statsample/anova/twoway.rb:103
#: lib/statsample/anova/oneway.rb:60
#: lib/statsample/crosstab.rb:101
#: lib/statsample/crosstab.rb:116
#: lib/statsample/crosstab.rb:151
#: lib/statsample/crosstab.rb:173
#: lib/statsample/dominanceanalysis.rb:354
msgid "Total"
msgstr "Total"

#: lib/statsample/anova/twoway.rb:172
msgid "Anova Two-Way on %s"
msgstr "Anova de dos vías en %s"

#: lib/statsample/anova/twoway.rb:184
#: lib/statsample/anova/oneway.rb:127
msgid "Test of Homogeneity of variances (Levene)"
msgstr "Test de homogeneidad de varianza (Levene)"

#: lib/statsample/anova/twoway.rb:189
#: lib/statsample/anova/twoway.rb:193
msgid "%s Mean"
msgstr "Promedio %s"

#: lib/statsample/anova/oneway.rb:35
msgid "Explained variance"
msgstr "Varianza explicada"

#: lib/statsample/anova/oneway.rb:36
msgid "Unexplained variance"
msgstr "Varianza sin explicar"

#: lib/statsample/anova/oneway.rb:97
msgid "Anova One-Way"
msgstr "Anova de una vía"

#: lib/statsample/anova/oneway.rb:98
msgid "Between Groups"
msgstr "Entre grupos"

#: lib/statsample/anova/oneway.rb:99
msgid "Within Groups"
msgstr "Dentro de grupos"

#: lib/statsample/anova/oneway.rb:119
msgid "Contrast for %s"
msgstr "Contraste para %s"

#: lib/statsample/anova/oneway.rb:163
msgid "Descriptives"
msgstr "Descriptivos"

#: lib/statsample/anova/contrast.rb:13
msgid "Psi estimate"
msgstr "Psi Estimado"

#: lib/statsample/anova/contrast.rb:14
msgid "Contrast"
msgstr "Contraste"

#: lib/statsample/anova/contrast.rb:73
msgid "Contrast:%s"
msgstr "Contraste: %s"

#: lib/statsample/graph/scatterplot.rb:72
msgid "Scatterplot (%s - %s)"
msgstr "Diagrama de dispersión (%s - %s)"

#: lib/statsample/graph/histogram.rb:50
msgid "Histograma (%s)"
msgstr "Histograma (%s)"

#: lib/statsample/graph/boxplot.rb:63
msgid "Boxplot"
msgstr "Diagrama de caja"

#: lib/statsample/bivariate/pearson.rb:32
msgid "Correlation (%s - %s)"
msgstr "Correlación (%s - %s)"

#: lib/statsample/bivariate/pearson.rb:50
msgid "%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)"
msgstr "%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s colas)"

#: lib/statsample/factor/parallelanalysis.rb:68
msgid "Parallel Analysis"
msgstr "Análisis Paralelo"

#: lib/statsample/factor/parallelanalysis.rb:96
msgid "Bootstrap Method: %s"
msgstr "Método de Remuestreo: %s"

#: lib/statsample/factor/parallelanalysis.rb:97
msgid "Uses SMC: %s"
msgstr "Usa SMC: %s"

#: lib/statsample/factor/parallelanalysis.rb:97
msgid "Yes"
msgstr "Sí"

#: lib/statsample/factor/parallelanalysis.rb:97
msgid "No"
msgstr "No"

#: lib/statsample/factor/parallelanalysis.rb:98
msgid "Correlation Matrix type : %s"
msgstr "Tipo de matriz de correlacion : %s"

#: lib/statsample/factor/parallelanalysis.rb:99
msgid "Number of variables: %d"
msgstr "Número de variables: %d"

#: lib/statsample/factor/parallelanalysis.rb:100
msgid "Number of cases: %d"
msgstr "Número de casos: %d"

#: lib/statsample/factor/parallelanalysis.rb:101
msgid "Number of iterations: %d"
msgstr "Número de iteraciones: %d"

#: lib/statsample/factor/parallelanalysis.rb:103
#: lib/statsample/factor/parallelanalysis.rb:111
#: lib/statsample/factor/map.rb:105
msgid "Eigenvalues"
msgstr "Eigenvalues"

#: lib/statsample/factor/parallelanalysis.rb:103
#: lib/statsample/factor/parallelanalysis.rb:111
msgid "generated eigenvalue"
msgstr "eigenvalue generado"

#: lib/statsample/factor/parallelanalysis.rb:110
msgid "Number or factors to preserve: %d"
msgstr "Número de factores a preservar: %d"

#: lib/statsample/factor/parallelanalysis.rb:111
msgid "data eigenvalue"
msgstr "eigenvalue de los datos"

#: lib/statsample/factor/parallelanalysis.rb:111
msgid "preserve?"
msgstr "¿preservar?"

#: lib/statsample/factor/map.rb:60
msgid "Velicer's MAP"
msgstr "PPM de Velicer"

#: lib/statsample/factor/map.rb:110
msgid "Velicer's Average Squared Correlations"
msgstr "Correlaciones Cuadradas Promedio de Velicer "

#: lib/statsample/factor/map.rb:110
msgid "number of components"
msgstr "número de componentes"

#: lib/statsample/factor/map.rb:110
msgid "average square correlation"
msgstr "correlación cuadrada promedio"

#: lib/statsample/factor/map.rb:115
msgid "The smallest average squared correlation is : %0.6f"
msgstr "La correlación cuadrada promedio más pequeña es: %0.6f"

#: lib/statsample/factor/map.rb:116
msgid "The number of components is : %d"
msgstr "El número de componentes es: %d"

#: lib/statsample/factor/pca.rb:56
msgid "Principal Component Analysis"
msgstr "Análisis de componentes principales"

#: lib/statsample/factor/pca.rb:59
#: lib/statsample/matrix.rb:14
#: lib/statsample/matrix.rb:81
msgid "VAR_%d"
msgstr "VAR_%d"

#: lib/statsample/factor/pca.rb:160
msgid "Component matrix (from covariance)"
msgstr "Matriz de componentes (desde covarianza)"

#: lib/statsample/factor/pca.rb:181
msgid "Component matrix"
msgstr "Matriz de componentes"

#: lib/statsample/factor/pca.rb:215
#: lib/statsample/factor/principalaxis.rb:200
msgid "Number of factors: %d"
msgstr "Número de factores: %d"

#: lib/statsample/factor/pca.rb:216
#: lib/statsample/factor/principalaxis.rb:202
msgid "Communalities"
msgstr "Comunalidades"

#: lib/statsample/factor/pca.rb:216
#: lib/statsample/factor/principalaxis.rb:202
msgid "Initial"
msgstr "Inicial"

#: lib/statsample/factor/pca.rb:216
#: lib/statsample/factor/principalaxis.rb:202
msgid "Extraction"
msgstr "Extracción"

#: lib/statsample/factor/pca.rb:216
#: lib/statsample/factor/pca.rb:223
#: lib/statsample/reliability/skillscaleanalysis.rb:92
msgid "%"
msgstr "%"

#: lib/statsample/factor/pca.rb:223
msgid "Total Variance Explained"
msgstr "Varianza Total Explicada"

#: lib/statsample/factor/pca.rb:223
msgid "Component"
msgstr "Componente"

#: lib/statsample/factor/pca.rb:223
msgid "E.Total"
msgstr "E. Total"

#: lib/statsample/factor/pca.rb:223
msgid "Cum. %"
msgstr "% Acum."

#: lib/statsample/factor/pca.rb:227
msgid "Component %d"
msgstr "Componente %d"

#: lib/statsample/factor/principalaxis.rb:70
msgid "Variable %d"
msgstr "Variable %d"

#: lib/statsample/factor/principalaxis.rb:147
msgid "Factor Matrix"
msgstr "Matriz de Factores"

#: lib/statsample/factor/principalaxis.rb:201
msgid "Iterations: %d"
msgstr "Iteraciones: %d"

#: lib/statsample/factor/principalaxis.rb:207
msgid "Total Variance"
msgstr "Varianza Total"

#: lib/statsample/factor/principalaxis.rb:207
msgid "Factor"
msgstr "Factor"

#: lib/statsample/factor/principalaxis.rb:207
msgid "I.E.Total"
msgstr "E.I. Total"

#: lib/statsample/factor/principalaxis.rb:207
msgid "I.E. %"
msgstr "E.I. %"

#: lib/statsample/factor/principalaxis.rb:207
msgid "I.E.Cum. %"
msgstr "E.I. Acum. %"

#: lib/statsample/factor/principalaxis.rb:208
msgid "S.L.Total"
msgstr "C.C. Total"

#: lib/statsample/factor/principalaxis.rb:208
msgid "S.L. %"
msgstr "C.C. %"

#: lib/statsample/factor/principalaxis.rb:208
msgid "S.L.Cum. %"
msgstr "C.C. Acum %"

#: lib/statsample/factor/principalaxis.rb:215
msgid "Factor %d"
msgstr "Factor %d"

#: lib/statsample/factor/rotation.rb:35
msgid "%s rotation"
msgstr "rotación %s"

#: lib/statsample/factor/rotation.rb:132
msgid "Rotated Component matrix"
msgstr "Matriz de componentes rotada"

#: lib/statsample/factor/rotation.rb:149
msgid "Component transformation matrix"
msgstr "Matriz de transformación de componentes"

#: lib/statsample/reliability/multiscaleanalysis.rb:67
msgid "Multiple Scale analysis"
msgstr "Análisis de múltiples escalas"

#: lib/statsample/reliability/multiscaleanalysis.rb:97
msgid "Scale %s"
msgstr "Escala %s"

#: lib/statsample/reliability/multiscaleanalysis.rb:145
msgid "Reliability analysis of scales"
msgstr "Análisis de confiabilidad de escalas"

#: lib/statsample/reliability/multiscaleanalysis.rb:151
msgid "Correlation matrix for %s"
msgstr "Matriz de correlaciones para %s"

#: lib/statsample/reliability/multiscaleanalysis.rb:156
msgid "PCA for %s"
msgstr "ACP para %s"

#: lib/statsample/reliability/multiscaleanalysis.rb:161
msgid "Principal Axis for %s"
msgstr "Ejes principales para %s"

#: lib/statsample/reliability/multiscaleanalysis.rb:167
msgid "Parallel Analysis for %s"
msgstr "Análisis Paralelo para %s"

#: lib/statsample/reliability/multiscaleanalysis.rb:172
msgid "MAP for %s"
msgstr "MAP para %s"

#: lib/statsample/reliability/skillscaleanalysis.rb:21
msgid "Skill Scale Reliability Analysis (%s)"
msgstr "Análisis de confiabilidad de escalas de habilidad"

#: lib/statsample/reliability/skillscaleanalysis.rb:36
msgid "%s(corrected)"
msgstr "%s(corregido)"

#: lib/statsample/reliability/skillscaleanalysis.rb:40
msgid "Corrected dataset from %s"
msgstr "Grupo de datos corregido desde %s"

#: lib/statsample/reliability/skillscaleanalysis.rb:51
msgid "%s (Scale Analysis)"
msgstr "%s (Análisis de Escala)"

#: lib/statsample/reliability/skillscaleanalysis.rb:82
msgid "Problematic Items"
msgstr "Ítems problemáticos"

#: lib/statsample/reliability/skillscaleanalysis.rb:87
msgid "Item: %s"
msgstr "Ítem: %s"

#: lib/statsample/reliability/skillscaleanalysis.rb:88
msgid "Correct answer: %s"
msgstr "Respuesta correcta: %s"

#: lib/statsample/reliability/skillscaleanalysis.rb:89
msgid "p: %0.3f"
msgstr "p: %0.3f"

#: lib/statsample/reliability/skillscaleanalysis.rb:101
msgid "No problematic items"
msgstr "Sin ítems problemáticos"

#: lib/statsample/reliability/scaleanalysis.rb:44
msgid "Reliability Analisis"
msgstr "Análisis de confiabilidad"

#: lib/statsample/reliability/scaleanalysis.rb:157
msgid "Summary for %s with all items"
msgstr "Sumario para %s con todos los ítems"

#: lib/statsample/reliability/scaleanalysis.rb:158
msgid "Items"
msgstr "Ítems"

#: lib/statsample/reliability/scaleanalysis.rb:159
#: lib/statsample/reliability/scaleanalysis.rb:176
msgid "Sum mean"
msgstr "Promedio de suma"

#: lib/statsample/reliability/scaleanalysis.rb:160
msgid "S.d. mean"
msgstr "Promedio de d.e."

#: lib/statsample/reliability/scaleanalysis.rb:162
msgid "Deleted items"
msgstr "Ítems eliminados"

#: lib/statsample/reliability/scaleanalysis.rb:172
msgid "Summary for %s"
msgstr "Sumario para %s"

#: lib/statsample/reliability/scaleanalysis.rb:173
msgid "Valid Items"
msgstr "Ítems Válidos"

#: lib/statsample/reliability/scaleanalysis.rb:175
msgid "Valid cases"
msgstr "casos válidos"

#: lib/statsample/reliability/scaleanalysis.rb:177
msgid "Sum sd"
msgstr "d.e. de suma"

#: lib/statsample/reliability/scaleanalysis.rb:179
msgid "Sum median"
msgstr "Mediana de suma"

#: lib/statsample/reliability/scaleanalysis.rb:181
msgid "Item mean"
msgstr "Promedio de los ítemes"

#: lib/statsample/reliability/scaleanalysis.rb:182
msgid "Item sd"
msgstr "DE de Items"

#: lib/statsample/reliability/scaleanalysis.rb:184
msgid "Skewness"
msgstr "Sesgo"

#: lib/statsample/reliability/scaleanalysis.rb:185
msgid "Kurtosis"
msgstr "Curtosis"

#: lib/statsample/reliability/scaleanalysis.rb:187
msgid "Cronbach's alpha"
msgstr "Alfa de Cronbach"

#: lib/statsample/reliability/scaleanalysis.rb:188
msgid "Standarized Cronbach's alpha"
msgstr "Alfa de Cronbach estandarizado"

#: lib/statsample/reliability/scaleanalysis.rb:189
msgid "Mean rpb"
msgstr "rbp medio"

#: lib/statsample/reliability/scaleanalysis.rb:191
msgid "Variances mean"
msgstr "Promedio de las varianzas"

#: lib/statsample/reliability/scaleanalysis.rb:192
msgid "Covariances mean"
msgstr "Promedio de las covarianzas"

#: lib/statsample/reliability/scaleanalysis.rb:196
msgid "Items for obtain alpha(0.8) : %d"
msgstr "Ítems para obtener alfa(0,8): %d"

#: lib/statsample/reliability/scaleanalysis.rb:197
msgid "Items for obtain alpha(0.9) : %d"
msgstr "Ítems para obtener alfa(0,9): %d"

#: lib/statsample/reliability/scaleanalysis.rb:205
msgid "Items report for %s"
msgstr "Reporte de ítems para %s"

#: lib/statsample/reliability/icc.rb:114
msgid "Shrout & Fleiss ICC(1,1)"
msgstr "Shrout & Fleiss ICC(1,1)"

#: lib/statsample/reliability/icc.rb:119
msgid "Shrout & Fleiss ICC(2,1)"
msgstr "Shrout & Fleiss ICC(2,1)"

#: lib/statsample/reliability/icc.rb:125
msgid "Shrout & Fleiss ICC(3,1)"
msgstr "Shrout & Fleiss ICC(3,1)"

#: lib/statsample/reliability/icc.rb:132
msgid "Shrout & Fleiss ICC(1,k)"
msgstr "Shrout & Fleiss ICC(1,k)"

#: lib/statsample/reliability/icc.rb:138
msgid "Shrout & Fleiss ICC(2,k)"
msgstr "Shrout & Fleiss ICC(2,k)"

#: lib/statsample/reliability/icc.rb:145
msgid "Shrout & Fleiss ICC(3,k)"
msgstr "Shrout & Fleiss ICC(3,k)"

#: lib/statsample/reliability/icc.rb:153
msgid "McGraw & Wong ICC(1)"
msgstr "McGraw & Wong ICC(1)"

#: lib/statsample/reliability/icc.rb:159
msgid "McGraw & Wong ICC(K)"
msgstr "McGraw & Wong ICC(K)"

#: lib/statsample/reliability/icc.rb:165
msgid "McGraw & Wong ICC(C,1)"
msgstr "McGraw & Wong ICC(C,1)"

#: lib/statsample/reliability/icc.rb:172
msgid "McGraw & Wong ICC(C,K)"
msgstr "McGraw & Wong ICC(C,K)"

#: lib/statsample/reliability/icc.rb:179
msgid "McGraw & Wong ICC(A,1)"
msgstr "McGraw & Wong ICC(A,1)"

#: lib/statsample/reliability/icc.rb:186
msgid "McGraw & Wong ICC(A,K)"
msgstr "McGraw & Wong ICC(A,K)"

#: lib/statsample/reliability/icc.rb:408
msgid "ICC: %0.4f"
msgstr "CIC: %0.3f"

#: lib/statsample/reliability/icc.rb:410
msgid "CI (%0.2f): [%0.4f - %0.4f]"
msgstr "IC (%0.2f): [%0.4f - %0.4f]"

#: lib/statsample/crosstab.rb:22
msgid "Crosstab %s - %s"
msgstr "Tabulación cruzada %s - %s"

#: lib/statsample/crosstab.rb:98
msgid "Rows: %s"
msgstr "Filas: %s"

#: lib/statsample/crosstab.rb:99
msgid "Columns: %s"
msgstr "Columnas: %s"

#: lib/statsample/crosstab.rb:101
msgid "Raw"
msgstr "En Bruto"

#: lib/statsample/crosstab.rb:146
msgid "% Row"
msgstr "% Fila"

#: lib/statsample/crosstab.rb:147
msgid "% Column"
msgstr "% Columna"

#: lib/statsample/crosstab.rb:148
msgid "% Total"
msgstr "% Total"

#: lib/statsample/dominanceanalysis.rb:121
msgid "Dominance Analysis:  %s over %s"
msgstr "Análisis de dominancia: %s en %s"

#: lib/statsample/dominanceanalysis.rb:315
msgid "sign"
msgstr "signo"

#: lib/statsample/dominanceanalysis.rb:317
msgid "Dominance Analysis result"
msgstr "Resultados del análisis de dominancia"

#: lib/statsample/dominanceanalysis.rb:318
msgid "Model 0"
msgstr "Modelo 0"

#: lib/statsample/dominanceanalysis.rb:333
msgid "k=%d Average"
msgstr "k=%d Promedio"

#: lib/statsample/dominanceanalysis.rb:345
msgid "Overall averages"
msgstr "Promedios generales"

#: lib/statsample/dominanceanalysis.rb:354
msgid "Pairwise dominance"
msgstr "Dominancia en pares"

#: lib/statsample/dominanceanalysis.rb:354
msgid "Pairs"
msgstr "Pares"

#: lib/statsample/dominanceanalysis.rb:354
msgid "Conditional"
msgstr "Condicional"

#: lib/statsample/dominanceanalysis.rb:354
msgid "General"
msgstr "General"

#: lib/statsample/matrix.rb:181
msgid "X%d"
msgstr "X%d"

#: lib/statsample/matrix.rb:184
msgid "Y%d"
msgstr "Y%d"

#: lib/statsample/matrix.rb:196
msgid "Matrix %d"
msgstr "Matriz %d"

#: lib/statsample/matrix.rb:255
msgid "Covariate matrix %d"
msgstr "Matriz de Covarianza %d"

#: lib/statsample/matrix.rb:303
msgid "Correlation"
msgstr "Correlación"

#: lib/statsample/matrix.rb:303
msgid "Covariance"
msgstr "Covarianza"

#: lib/statsample/matrix.rb:303
msgid " Matrix"
msgstr "Matriz"

#: lib/statsample/vector.rb:177
msgid "%s(standarized)"
msgstr "%s(estandarizado)"

#: lib/statsample/vector.rb:189
msgid "%s(centered)"
msgstr "%s(centrado)"

#: lib/statsample/vector.rb:201
msgid "%s(percentil)"
msgstr "%s(percentil)"

#: lib/statsample/vector.rb:778
msgid "n :%d"
msgstr "n: %s"

#: lib/statsample/vector.rb:779
msgid "n valid:%d"
msgstr "n válido: %d"

#: lib/statsample/vector.rb:780
msgid "factors:%s"
msgstr "factores:%s"

#: lib/statsample/vector.rb:781
msgid "mode: %s"
msgstr "modo: %s"

#: lib/statsample/vector.rb:782
msgid "Distribution"
msgstr "Distribución"

#: lib/statsample/vector.rb:788
msgid "median: %s"
msgstr "Mediana: %s"

#: lib/statsample/vector.rb:790
msgid "mean: %0.4f"
msgstr "promedio: %0.3f"

#: lib/statsample/vector.rb:791
msgid "sd: %0.4f"
msgstr "d.e.: %0.3f"

#: lib/statsample/dataset.rb:161
msgid "Dataset %d"
msgstr "Dataset %d"

#: lib/statsample/dataset.rb:457
msgid "Sum from %s"
msgstr "Suma para %s"

#: lib/statsample/dataset.rb:510
msgid "Means from %s"
msgstr "Media desde %s"

#: lib/statsample/dataset.rb:734
msgid "%s(filtered)"
msgstr "%s(filtrado)"

#: lib/statsample/dataset.rb:956
msgid "Cases: %d"
msgstr "Casos: %s"


================================================
FILE: po/statsample.pot
================================================
# Statsample po template.
# Copyright (C) 2009-2009 Claudio Bustos
# This file is distributed under the same license as the Statsample package.
# Claudio Bustos <clbustos_AT_gmail.com>
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: statsample 1.0.1\n"
"POT-Creation-Date: 2011-03-03 12:03-0300\n"
"PO-Revision-Date: 2009-08-04 15:36-0400\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"

#: lib/statsample/test/f.rb:26
msgid "F Test"
msgstr ""

#: lib/statsample/test/t.rb:82
msgid "T Test"
msgstr ""

#: lib/statsample/test/t.rb:83
msgid "Estimate"
msgstr ""

#: lib/statsample/test/t.rb:84
msgid "Std.Err.of Estimate"
msgstr ""

#: lib/statsample/test/t.rb:114
msgid "%s: %0.4f | %s: %0.4f"
msgstr ""

#: lib/statsample/test/t.rb:120
msgid "t(%d) = %0.4f, p=%0.4f (%s tails)"
msgstr ""

#: lib/statsample/test/t.rb:121
msgid "CI(%d%%): %0.4f - %0.4f"
msgstr ""

#: lib/statsample/test/t.rb:190
msgid "Sample mean: %0.4f | Sample sd: %0.4f | se : %0.4f"
msgstr ""

#: lib/statsample/test/t.rb:191
msgid "Population mean: %0.4f"
msgstr ""

#: lib/statsample/test/t.rb:292
msgid "Mean and standard deviation"
msgstr ""

#: lib/statsample/test/t.rb:292 lib/statsample/regression/simple.rb:109
#: lib/statsample/factor/pca.rb:216 lib/statsample/factor/principalaxis.rb:202
msgid "Variable"
msgstr ""

#: lib/statsample/test/t.rb:292
#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "mean"
msgstr ""

#: lib/statsample/test/t.rb:292
msgid "sd"
msgstr ""

#: lib/statsample/test/t.rb:292 lib/statsample/factor/parallelanalysis.rb:103
#: lib/statsample/factor/parallelanalysis.rb:111
msgid "n"
msgstr ""

#: lib/statsample/test/t.rb:296
msgid "Levene test for equality of variances"
msgstr ""

#: lib/statsample/test/t.rb:298
msgid "T statistics"
msgstr ""

#: lib/statsample/test/t.rb:299
msgid "Equal variance"
msgstr ""

#: lib/statsample/test/t.rb:300
msgid "Non equal variance"
msgstr ""

#: lib/statsample/test/t.rb:302
msgid "Effect size"
msgstr ""

#: lib/statsample/test/umannwhitney.rb:140
msgid "Mann-Whitney's U"
msgstr ""

#: lib/statsample/test/umannwhitney.rb:149
msgid "%s results"
msgstr ""

#: lib/statsample/test/umannwhitney.rb:150
#: lib/statsample/test/umannwhitney.rb:151
msgid "Sum of ranks %s"
msgstr ""

#: lib/statsample/test/umannwhitney.rb:152
msgid "U Value"
msgstr ""

#: lib/statsample/test/umannwhitney.rb:153
msgid "Z"
msgstr ""

#: lib/statsample/test/umannwhitney.rb:155
msgid "Exact p (Dinneen & Blakesley, 1973):"
msgstr ""

#: lib/statsample/test/levene.rb:37
msgid "Levene Test"
msgstr ""

#: lib/statsample/test/bartlettsphericity.rb:25
msgid "Bartlett's test of sphericity"
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:27
msgid "Multiple Regression:  %s over %s"
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:40
msgid "Regression"
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:40
msgid "Error"
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:184
msgid "Engine: %s"
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:185
msgid "Cases(listwise)=%d(%d)"
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:186
msgid "R="
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:187
msgid "R^2="
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:188
msgid "R^2 Adj="
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:189
msgid "Std.Error R="
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:191
msgid "Equation"
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:197
msgid "Beta coefficients"
msgstr ""

#: lib/statsample/regression/multiple/baseengine.rb:198
msgid "Constant"
msgstr ""

#: lib/statsample/regression/multiple/matrixengine.rb:78
msgid "Multiple reggresion of %s on %s"
msgstr ""

#: lib/statsample/regression/simple.rb:88
msgid "Regression of %s over %s"
msgstr ""

#: lib/statsample/regression/simple.rb:109 lib/statsample/factor/map.rb:105
#: lib/statsample/reliability/skillscaleanalysis.rb:92
msgid "Value"
msgstr ""

#: lib/statsample/regression/simple.rb:110
msgid "r"
msgstr ""

#: lib/statsample/regression/simple.rb:111
msgid "r^2"
msgstr ""

#: lib/statsample/regression/simple.rb:112
msgid "a"
msgstr ""

#: lib/statsample/regression/simple.rb:113
msgid "b"
msgstr ""

#: lib/statsample/regression/simple.rb:114
msgid "s.e"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:115
msgid "Bootstrap dominance Analysis:  %s over %s"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:138
msgid "Bootstrap %d of %d"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:177
msgid "Sample size: %d\n"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:179
msgid "Linear Regression Engine: %s"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:181
msgid "pairs"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:181
msgid "SE(Dij)"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:181
msgid "Reproducibility"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:182
msgid "Complete dominance"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:190
msgid "Conditional dominance"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:199
msgid "General Dominance"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "General averages"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "var"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "se"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "p.5"
msgstr ""

#: lib/statsample/dominanceanalysis/bootstrap.rb:208
msgid "p.95"
msgstr ""

#: lib/statsample/anova/twoway.rb:59
msgid "ANOVA Two-Way"
msgstr ""

#: lib/statsample/anova/twoway.rb:60
msgid "A"
msgstr ""

#: lib/statsample/anova/twoway.rb:61
msgid "B"
msgstr ""

#: lib/statsample/anova/twoway.rb:62
msgid "Within"
msgstr ""

#: lib/statsample/anova/twoway.rb:98 lib/statsample/anova/oneway.rb:57
msgid "%s Table"
msgstr ""

#: lib/statsample/anova/twoway.rb:103 lib/statsample/anova/oneway.rb:60
#: lib/statsample/crosstab.rb:101 lib/statsample/crosstab.rb:116
#: lib/statsample/crosstab.rb:151 lib/statsample/crosstab.rb:173
#: lib/statsample/dominanceanalysis.rb:354
msgid "Total"
msgstr ""

#: lib/statsample/anova/twoway.rb:172
msgid "Anova Two-Way on %s"
msgstr ""

#: lib/statsample/anova/twoway.rb:184 lib/statsample/anova/oneway.rb:127
msgid "Test of Homogeneity of variances (Levene)"
msgstr ""

#: lib/statsample/anova/twoway.rb:189 lib/statsample/anova/twoway.rb:193
msgid "%s Mean"
msgstr ""

#: lib/statsample/anova/oneway.rb:35
msgid "Explained variance"
msgstr ""

#: lib/statsample/anova/oneway.rb:36
msgid "Unexplained variance"
msgstr ""

#: lib/statsample/anova/oneway.rb:97
msgid "Anova One-Way"
msgstr ""

#: lib/statsample/anova/oneway.rb:98
msgid "Between Groups"
msgstr ""

#: lib/statsample/anova/oneway.rb:99
msgid "Within Groups"
msgstr ""

#: lib/statsample/anova/oneway.rb:119
msgid "Contrast for %s"
msgstr ""

#: lib/statsample/anova/oneway.rb:163
msgid "Descriptives"
msgstr ""

#: lib/statsample/anova/contrast.rb:13
msgid "Psi estimate"
msgstr ""

#: lib/statsample/anova/contrast.rb:14
msgid "Contrast"
msgstr ""

#: lib/statsample/anova/contrast.rb:73
msgid "Contrast:%s"
msgstr ""

#: lib/statsample/graph/scatterplot.rb:72
msgid "Scatterplot (%s - %s)"
msgstr ""

#: lib/statsample/graph/histogram.rb:50
msgid "Histograma (%s)"
msgstr ""

#: lib/statsample/graph/boxplot.rb:63
msgid "Boxplot"
msgstr ""

#: lib/statsample/bivariate/pearson.rb:32
msgid "Correlation (%s - %s)"
msgstr ""

#: lib/statsample/bivariate/pearson.rb:50
msgid "%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:68
msgid "Parallel Analysis"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:96
msgid "Bootstrap Method: %s"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:97
msgid "Uses SMC: %s"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:97
msgid "Yes"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:97
msgid "No"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:98
msgid "Correlation Matrix type : %s"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:99
msgid "Number of variables: %d"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:100
msgid "Number of cases: %d"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:101
msgid "Number of iterations: %d"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:103
#: lib/statsample/factor/parallelanalysis.rb:111
#: lib/statsample/factor/map.rb:105
msgid "Eigenvalues"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:103
#: lib/statsample/factor/parallelanalysis.rb:111
msgid "generated eigenvalue"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:110
msgid "Number or factors to preserve: %d"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:111
msgid "data eigenvalue"
msgstr ""

#: lib/statsample/factor/parallelanalysis.rb:111
msgid "preserve?"
msgstr ""

#: lib/statsample/factor/map.rb:60
msgid "Velicer's MAP"
msgstr ""

#: lib/statsample/factor/map.rb:110
msgid "Velicer's Average Squared Correlations"
msgstr ""

#: lib/statsample/factor/map.rb:110
msgid "number of components"
msgstr ""

#: lib/statsample/factor/map.rb:110
msgid "average square correlation"
msgstr ""

#: lib/statsample/factor/map.rb:115
msgid "The smallest average squared correlation is : %0.6f"
msgstr ""

#: lib/statsample/factor/map.rb:116
msgid "The number of components is : %d"
msgstr ""

#: lib/statsample/factor/pca.rb:56
msgid "Principal Component Analysis"
msgstr ""

#: lib/statsample/factor/pca.rb:59 lib/statsample/matrix.rb:14
#: lib/statsample/matrix.rb:81
msgid "VAR_%d"
msgstr ""

#: lib/statsample/factor/pca.rb:160
msgid "Component matrix (from covariance)"
msgstr ""

#: lib/statsample/factor/pca.rb:181
msgid "Component matrix"
msgstr ""

#: lib/statsample/factor/pca.rb:215 lib/statsample/factor/principalaxis.rb:200
msgid "Number of factors: %d"
msgstr ""

#: lib/statsample/factor/pca.rb:216 lib/statsample/factor/principalaxis.rb:202
msgid "Communalities"
msgstr ""

#: lib/statsample/factor/pca.rb:216 lib/statsample/factor/principalaxis.rb:202
msgid "Initial"
msgstr ""

#: lib/statsample/factor/pca.rb:216 lib/statsample/factor/principalaxis.rb:202
msgid "Extraction"
msgstr ""

#: lib/statsample/factor/pca.rb:216 lib/statsample/factor/pca.rb:223
#: lib/statsample/reliability/skillscaleanalysis.rb:92
msgid "%"
msgstr ""

#: lib/statsample/factor/pca.rb:223
msgid "Total Variance Explained"
msgstr ""

#: lib/statsample/factor/pca.rb:223
msgid "Component"
msgstr ""

#: lib/statsample/factor/pca.rb:223
msgid "E.Total"
msgstr ""

#: lib/statsample/factor/pca.rb:223
msgid "Cum. %"
msgstr ""

#: lib/statsample/factor/pca.rb:227
msgid "Component %d"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:70
msgid "Variable %d"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:147
msgid "Factor Matrix"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:201
msgid "Iterations: %d"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:207
msgid "Total Variance"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:207
msgid "Factor"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:207
msgid "I.E.Total"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:207
msgid "I.E. %"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:207
msgid "I.E.Cum. %"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:208
msgid "S.L.Total"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:208
msgid "S.L. %"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:208
msgid "S.L.Cum. %"
msgstr ""

#: lib/statsample/factor/principalaxis.rb:215
msgid "Factor %d"
msgstr ""

#: lib/statsample/factor/rotation.rb:35
msgid "%s rotation"
msgstr ""

#: lib/statsample/factor/rotation.rb:132
msgid "Rotated Component matrix"
msgstr ""

#: lib/statsample/factor/rotation.rb:149
msgid "Component transformation matrix"
msgstr ""

#: lib/statsample/reliability/multiscaleanalysis.rb:67
msgid "Multiple Scale analysis"
msgstr ""

#: lib/statsample/reliability/multiscaleanalysis.rb:97
msgid "Scale %s"
msgstr ""

#: lib/statsample/reliability/multiscaleanalysis.rb:145
msgid "Reliability analysis of scales"
msgstr ""

#: lib/statsample/reliability/multiscaleanalysis.rb:151
msgid "Correlation matrix for %s"
msgstr ""

#: lib/statsample/reliability/multiscaleanalysis.rb:156
msgid "PCA for %s"
msgstr ""

#: lib/statsample/reliability/multiscaleanalysis.rb:161
msgid "Principal Axis for %s"
msgstr ""

#: lib/statsample/reliability/multiscaleanalysis.rb:167
msgid "Parallel Analysis for %s"
msgstr ""

#: lib/statsample/reliability/multiscaleanalysis.rb:172
msgid "MAP for %s"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:21
msgid "Skill Scale Reliability Analysis (%s)"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:36
msgid "%s(corrected)"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:40
msgid "Corrected dataset from %s"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:51
msgid "%s (Scale Analysis)"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:82
msgid "Problematic Items"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:87
msgid "Item: %s"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:88
msgid "Correct answer: %s"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:89
msgid "p: %0.3f"
msgstr ""

#: lib/statsample/reliability/skillscaleanalysis.rb:101
msgid "No problematic items"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:44
msgid "Reliability Analisis"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:157
msgid "Summary for %s with all items"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:158
msgid "Items"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:159
#: lib/statsample/reliability/scaleanalysis.rb:176
msgid "Sum mean"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:160
msgid "S.d. mean"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:162
msgid "Deleted items"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:172
msgid "Summary for %s"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:173
msgid "Valid Items"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:175
msgid "Valid cases"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:177
msgid "Sum sd"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:179
msgid "Sum median"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:181
msgid "Item mean"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:182
msgid "Item sd"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:184
msgid "Skewness"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:185
msgid "Kurtosis"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:187
msgid "Cronbach's alpha"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:188
msgid "Standarized Cronbach's alpha"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:189
msgid "Mean rpb"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:191
msgid "Variances mean"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:192
msgid "Covariances mean"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:196
msgid "Items for obtain alpha(0.8) : %d"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:197
msgid "Items for obtain alpha(0.9) : %d"
msgstr ""

#: lib/statsample/reliability/scaleanalysis.rb:205
msgid "Items report for %s"
msgstr ""

#: lib/statsample/reliability/icc.rb:114
msgid "Shrout & Fleiss ICC(1,1)"
msgstr ""

#: lib/statsample/reliability/icc.rb:119
msgid "Shrout & Fleiss ICC(2,1)"
msgstr ""

#: lib/statsample/reliability/icc.rb:125
msgid "Shrout & Fleiss ICC(3,1)"
msgstr ""

#: lib/statsample/reliability/icc.rb:132
msgid "Shrout & Fleiss ICC(1,k)"
msgstr ""

#: lib/statsample/reliability/icc.rb:138
msgid "Shrout & Fleiss ICC(2,k)"
msgstr ""

#: lib/statsample/reliability/icc.rb:145
msgid "Shrout & Fleiss ICC(3,k)"
msgstr ""

#: lib/statsample/reliability/icc.rb:153
msgid "McGraw & Wong ICC(1)"
msgstr ""

#: lib/statsample/reliability/icc.rb:159
msgid "McGraw & Wong ICC(K)"
msgstr ""

#: lib/statsample/reliability/icc.rb:165
msgid "McGraw & Wong ICC(C,1)"
msgstr ""

#: lib/statsample/reliability/icc.rb:172
msgid "McGraw & Wong ICC(C,K)"
msgstr ""

#: lib/statsample/reliability/icc.rb:179
msgid "McGraw & Wong ICC(A,1)"
msgstr ""

#: lib/statsample/reliability/icc.rb:186
msgid "McGraw & Wong ICC(A,K)"
msgstr ""

#: lib/statsample/reliability/icc.rb:408
msgid "ICC: %0.4f"
msgstr ""

#: lib/statsample/reliability/icc.rb:410
msgid "CI (%0.2f): [%0.4f - %0.4f]"
msgstr ""

#: lib/statsample/crosstab.rb:22
msgid "Crosstab %s - %s"
msgstr ""

#: lib/statsample/crosstab.rb:98
msgid "Rows: %s"
msgstr ""

#: lib/statsample/crosstab.rb:99
msgid "Columns: %s"
msgstr ""

#: lib/statsample/crosstab.rb:101
msgid "Raw"
msgstr ""

#: lib/statsample/crosstab.rb:146
msgid "% Row"
msgstr ""

#: lib/statsample/crosstab.rb:147
msgid "% Column"
msgstr ""

#: lib/statsample/crosstab.rb:148
msgid "% Total"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:121
msgid "Dominance Analysis:  %s over %s"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:315
msgid "sign"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:317
msgid "Dominance Analysis result"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:318
msgid "Model 0"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:333
msgid "k=%d Average"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:345
msgid "Overall averages"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:354
msgid "Pairwise dominance"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:354
msgid "Pairs"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:354
msgid "Conditional"
msgstr ""

#: lib/statsample/dominanceanalysis.rb:354
msgid "General"
msgstr ""

#: lib/statsample/matrix.rb:181
msgid "X%d"
msgstr ""

#: lib/statsample/matrix.rb:184
msgid "Y%d"
msgstr ""

#: lib/statsample/matrix.rb:196
msgid "Matrix %d"
msgstr ""

#: lib/statsample/matrix.rb:255
msgid "Covariate matrix %d"
msgstr ""

#: lib/statsample/matrix.rb:303
msgid "Correlation"
msgstr ""

#: lib/statsample/matrix.rb:303
msgid "Covariance"
msgstr ""

#: lib/statsample/matrix.rb:303
msgid " Matrix"
msgstr ""

#: lib/statsample/vector.rb:177
msgid "%s(standarized)"
msgstr ""

#: lib/statsample/vector.rb:189
msgid "%s(centered)"
msgstr ""

#: lib/statsample/vector.rb:201
msgid "%s(percentil)"
msgstr ""

#: lib/statsample/vector.rb:778
msgid "n :%d"
msgstr ""

#: lib/statsample/vector.rb:779
msgid "n valid:%d"
msgstr ""

#: lib/statsample/vector.rb:780
msgid "factors:%s"
msgstr ""

#: lib/statsample/vector.rb:781
msgid "mode: %s"
msgstr ""

#: lib/statsample/vector.rb:782
msgid "Distribution"
msgstr ""

#: lib/statsample/vector.rb:788
msgid "median: %s"
msgstr ""

#: lib/statsample/vector.rb:790
msgid "mean: %0.4f"
msgstr ""

#: lib/statsample/vector.rb:791
msgid "sd: %0.4f"
msgstr ""

#: lib/statsample/dataset.rb:161
msgid "Dataset %d"
msgstr ""

#: lib/statsample/dataset.rb:457
msgid "Sum from %s"
msgstr ""

#: lib/statsample/dataset.rb:510
msgid "Means from %s"
msgstr ""

#: lib/statsample/dataset.rb:734
msgid "%s(filtered)"
msgstr ""

#: lib/statsample/dataset.rb:956
msgid "Cases: %d"
msgstr ""


================================================
FILE: references.txt
================================================
References
* Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
* Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
* Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
* Cochran, W.(1972). Sampling Techniques [spanish edition].
* Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
* Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. <em>Journal of the Royal Statistical Society, 22</em>(2), 269-273
* Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
* Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
* Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
* Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.
* Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
* Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
* McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.
* O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
* SPSS Manual
* Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
* Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
* Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf 
* http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation
* http://europe.isixsigma.com/library/content/c080806a.asp
* http://stattrek.com/Lesson6/SRS.aspx
* http://talkstats.com/showthread.php?t=5056
* http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html


================================================
FILE: setup.rb
================================================
#
# setup.rb
#
# Copyright (c) 2000-2005 Minero Aoki
#
# This program is free software.
# You can distribute/modify this program under the terms of
# the GNU LGPL, Lesser General Public License version 2.1.
#

unless Enumerable.method_defined?(:map)   # Ruby 1.4.6
  module Enumerable
    alias map collect
  end
end

unless File.respond_to?(:read)   # Ruby 1.6
  def File.read(fname)
    open(fname) {|f|
      return f.read
    }
  end
end

unless Errno.const_defined?(:ENOTEMPTY)   # Windows?
  module Errno
    class ENOTEMPTY
      # We do not raise this exception, implementation is not needed.
    end
  end
end

def File.binread(fname)
  open(fname, 'rb') {|f|
    return f.read
  }
end

# for corrupted Windows' stat(2)
def File.dir?(path)
  File.directory?((path[-1,1] == '/') ? path : path + '/')
end


class ConfigTable

  include Enumerable

  def initialize(rbconfig)
    @rbconfig = rbconfig
    @items = []
    @table = {}
    # options
    @install_prefix = nil
    @config_opt = nil
    @verbose = true
    @no_harm = false
  end

  attr_accessor :install_prefix
  attr_accessor :config_opt

  attr_writer :verbose

  def verbose?
    @verbose
  end

  attr_writer :no_harm

  def no_harm?
    @no_harm
  end

  def [](key)
    lookup(key).resolve(self)
  end

  def []=(key, val)
    lookup(key).set val
  end

  def names
    @items.map {|i| i.name }
  end

  def each(&block)
    @items.each(&block)
  end

  def key?(name)
    @table.key?(name)
  end

  def lookup(name)
    @table[name] or setup_rb_error "no such config item: #{name}"
  end

  def add(item)
    @items.push item
    @table[item.name] = item
  end

  def remove(name)
    item = lookup(name)
    @items.delete_if {|i| i.name == name }
    @table.delete_if {|name, i| i.name == name }
    item
  end

  def load_script(path, inst = nil)
    if File.file?(path)
      MetaConfigEnvironment.new(self, inst).instance_eval File.read(path), path
    end
  end

  def savefile
    '.config'
  end

  def load_savefile
    begin
      File.foreach(savefile()) do |line|
        k, v = *line.split(/=/, 2)
        self[k] = v.strip
      end
    rescue Errno::ENOENT
      setup_rb_error $!.message + "\n#{File.basename($0)} config first"
    end
  end

  def save
    @items.each {|i| i.value }
    File.open(savefile(), 'w') {|f|
      @items.each do |i|
        f.printf "%s=%s\n", i.name, i.value if i.value? and i.value
      end
    }
  end

  def load_standard_entries
    standard_entries(@rbconfig).each do |ent|
      add ent
    end
  end

  def standard_entries(rbconfig)
    c = rbconfig

    rubypath = File.join(c['bindir'], c['ruby_install_name'] + c['EXEEXT'])

    major = c['MAJOR'].to_i
    minor = c['MINOR'].to_i
    teeny = c['TEENY'].to_i
    version = "#{major}.#{minor}"

    # ruby ver. >= 1.4.4?
    newpath_p = ((major >= 2) or
                 ((major == 1) and
                  ((minor >= 5) or
                   ((minor == 4) and (teeny >= 4)))))

    if c['rubylibdir']
      # V > 1.6.3
      libruby         = "#{c['prefix']}/lib/ruby"
      librubyver      = c['rubylibdir']
      librubyverarch  = c['archdir']
      siteruby        = c['sitedir']
      siterubyver     = c['sitelibdir']
      siterubyverarch = c['sitearchdir']
    elsif newpath_p
      # 1.4.4 <= V <= 1.6.3
      libruby         = "#{c['prefix']}/lib/ruby"
      librubyver      = "#{c['prefix']}/lib/ruby/#{version}"
      librubyverarch  = "#{c['prefix']}/lib/ruby/#{version}/#{c['arch']}"
      siteruby        = c['sitedir']
      siterubyver     = "$siteruby/#{version}"
      siterubyverarch = "$siterubyver/#{c['arch']}"
    else
      # V < 1.4.4
      libruby         = "#{c['prefix']}/lib/ruby"
      librubyver      = "#{c['prefix']}/lib/ruby/#{version}"
      librubyverarch  = "#{c['prefix']}/lib/ruby/#{version}/#{c['arch']}"
      siteruby        = "#{c['prefix']}/lib/ruby/#{version}/site_ruby"
      siterubyver     = siteruby
      siterubyverarch = "$siterubyver/#{c['arch']}"
    end
    parameterize = lambda {|path|
      path.sub(/\A#{Regexp.quote(c['prefix'])}/, '$prefix')
    }

    if arg = c['configure_args'].split.detect {|arg| /--with-make-prog=/ =~ arg }
      makeprog = arg.sub(/'/, '').split(/=/, 2)[1]
    else
      makeprog = 'make'
    end

    [
      ExecItem.new('installdirs', 'std/site/home',
                   'std: install under libruby; site: install under site_ruby; home: install under $HOME')\
          {|val, table|
            case val
            when 'std'
              table['rbdir'] = '$librubyver'
              table['sodir'] = '$librubyverarch'
            when 'site'
              table['rbdir'] = '$siterubyver'
              table['sodir'] = '$siterubyverarch'
            when 'home'
              setup_rb_error '$HOME was not set' unless ENV['HOME']
              table['prefix'] = ENV['HOME']
              table['rbdir'] = '$libdir/ruby'
              table['sodir'] = '$libdir/ruby'
            end
          },
      PathItem.new('prefix', 'path', c['prefix'],
                   'path prefix of target environment'),
      PathItem.new('bindir', 'path', parameterize.call(c['bindir']),
                   'the directory for commands'),
      PathItem.new('libdir', 'path', parameterize.call(c['libdir']),
                   'the directory for libraries'),
      PathItem.new('datadir', 'path', parameterize.call(c['datadir']),
                   'the directory for shared data'),
      PathItem.new('mandir', 'path', parameterize.call(c['mandir']),
                   'the directory for man pages'),
      PathItem.new('sysconfdir', 'path', parameterize.call(c['sysconfdir']),
                   'the directory for system configuration files'),
      PathItem.new('localstatedir', 'path', parameterize.call(c['localstatedir']),
                   'the directory for local state data'),
      PathItem.new('libruby', 'path', libruby,
                   'the directory for ruby libraries'),
      PathItem.new('librubyver', 'path', librubyver,
                   'the directory for standard ruby libraries'),
      PathItem.new('librubyverarch', 'path', librubyverarch,
                   'the directory for standard ruby extensions'),
      PathItem.new('siteruby', 'path', siteruby,
          'the directory for version-independent aux ruby libraries'),
      PathItem.new('siterubyver', 'path', siterubyver,
                   'the directory for aux ruby libraries'),
      PathItem.new('siterubyverarch', 'path', siterubyverarch,
                   'the directory for aux ruby binaries'),
      PathItem.new('rbdir', 'path', '$siterubyver',
                   'the directory for ruby scripts'),
      PathItem.new('sodir', 'path', '$siterubyverarch',
                   'the directory for ruby extentions'),
      PathItem.new('rubypath', 'path', rubypath,
                   'the path to set to #! line'),
      ProgramItem.new('rubyprog', 'name', rubypath,
                      'the ruby program using for installation'),
      ProgramItem.new('makeprog', 'name', makeprog,
                      'the make program to compile ruby extentions'),
      SelectItem.new('shebang', 'all/ruby/never', 'ruby',
                     'shebang line (#!) editing mode'),
      BoolItem.new('without-ext', 'yes/no', 'no',
                   'does not compile/install ruby extentions')
    ]
  end
  private :standard_entries

  def load_multipackage_entries
    multipackage_entries().each do |ent|
      add ent
    end
  end

  def multipackage_entries
    [
      PackageSelectionItem.new('with', 'name,name...', '', 'ALL',
                               'package names that you want to install'),
      PackageSelectionItem.new('without', 'name,name...', '', 'NONE',
                               'package names that you do not want to install')
    ]
  end
  private :multipackage_entries

  ALIASES = {
    'std-ruby'         => 'librubyver',
    'stdruby'          => 'librubyver',
    'rubylibdir'       => 'librubyver',
    'archdir'          => 'librubyverarch',
    'site-ruby-common' => 'siteruby',     # For backward compatibility
    'site-ruby'        => 'siterubyver',  # For backward compatibility
    'bin-dir'          => 'bindir',
    'bin-dir'          => 'bindir',
    'rb-dir'           => 'rbdir',
    'so-dir'           => 'sodir',
    'data-dir'         => 'datadir',
    'ruby-path'        => 'rubypath',
    'ruby-prog'        => 'rubyprog',
    'ruby'             => 'rubyprog',
    'make-prog'        => 'makeprog',
    'make'             => 'makeprog'
  }

  def fixup
    ALIASES.each do |ali, name|
      @table[ali] = @table[name]
    end
    @items.freeze
    @table.freeze
    @options_re = /\A--(#{@table.keys.join('|')})(?:=(.*))?\z/
  end

  def parse_opt(opt)
    m = @options_re.match(opt) or setup_rb_error "config: unknown option #{opt}"
    m.to_a[1,2]
  end

  def dllext
    @rbconfig['DLEXT']
  end

  def value_config?(name)
    lookup(name).value?
  end

  class Item
    def initialize(name, template, default, desc)
      @name = name.freeze
      @template = template
      @value = default
      @default = default
      @description = desc
    end

    attr_reader :name
    attr_reader :description

    attr_accessor :default
    alias help_default default

    def help_opt
      "--#{@name}=#{@template}"
    end

    def value?
      true
    end

    def value
      @value
    end

    def resolve(table)
      @value.gsub(%r<\$([^/]+)>) { table[$1] }
    end

    def set(val)
      @value = check(val)
    end

    private

    def check(val)
      setup_rb_error "config: --#{name} requires argument" unless val
      val
    end
  end

  class BoolItem < Item
    def config_type
      'bool'
    end

    def help_opt
      "--#{@name}"
    end

    private

    def check(val)
      return 'yes' unless val
      case val
      when /\Ay(es)?\z/i, /\At(rue)?\z/i then 'yes'
      when /\An(o)?\z/i, /\Af(alse)\z/i  then 'no'
      else
        setup_rb_error "config: --#{@name} accepts only yes/no for argument"
      end
    end
  end

  class PathItem < Item
    def config_type
      'path'
    end

    private

    def check(path)
      setup_rb_error "config: --#{@name} requires argument"  unless path
      path[0,1] == '$' ? path : File.expand_path(path)
    end
  end

  class ProgramItem < Item
    def config_type
      'program'
    end
  end

  class SelectItem < Item
    def initialize(name, selection, default, desc)
      super
      @ok = selection.split('/')
    end

    def config_type
      'select'
    end

    private

    def check(val)
      unless @ok.include?(val.strip)
        setup_rb_error "config: use --#{@name}=#{@template} (#{val})"
      end
      val.strip
    end
  end

  class ExecItem < Item
    def initialize(name, selection, desc, &block)
      super name, selection, nil, desc
      @ok = selection.split('/')
      @action = block
    end

    def config_type
      'exec'
    end

    def value?
      false
    end

    def resolve(table)
      setup_rb_error "$#{name()} wrongly used as option value"
    end

    undef set

    def evaluate(val, table)
      v = val.strip.downcase
      unless @ok.include?(v)
        setup_rb_error "invalid option --#{@name}=#{val} (use #{@template})"
      end
      @action.call v, table
    end
  end

  class PackageSelectionItem < Item
    def initialize(name, template, default, help_default, desc)
      super name, template, default, desc
      @help_default = help_default
    end

    attr_reader :help_default

    def config_type
      'package'
    end

    private

    def check(val)
      unless File.dir?("packages/#{val}")
        setup_rb_error "config: no such package: #{val}"
      end
      val
    end
  end

  class MetaConfigEnvironment
    def initialize(config, installer)
      @config = config
      @installer = installer
    end

    def config_names
      @config.names
    end

    def config?(name)
      @config.key?(name)
    end

    def bool_config?(name)
      @config.lookup(name).config_type == 'bool'
    end

    def path_config?(name)
      @config.lookup(name).config_type == 'path'
    end

    def value_config?(name)
      @config.lookup(name).config_type != 'exec'
    end

    def add_config(item)
      @config.add item
    end

    def add_bool_config(name, default, desc)
      @config.add BoolItem.new(name, 'yes/no', default ? 'yes' : 'no', desc)
    end

    def add_path_config(name, default, desc)
      @config.add PathItem.new(name, 'path', default, desc)
    end

    def set_config_default(name, default)
      @config.lookup(name).default = default
    end

    def remove_config(name)
      @config.remove(name)
    end

    # For only multipackage
    def packages
      raise '[setup.rb fatal] multi-package metaconfig API packages() called for single-package; contact application package vendor' unless @installer
      @installer.packages
    end

    # For only multipackage
    def declare_packages(list)
      raise '[setup.rb fatal] multi-package metaconfig API declare_packages() called for single-package; contact application package vendor' unless @installer
      @installer.packages = list
    end
  end

end   # class ConfigTable


# This module requires: #verbose?, #no_harm?
module FileOperations

  def mkdir_p(dirname, prefix = nil)
    dirname = prefix + File.expand_path(dirname) if prefix
    $stderr.puts "mkdir -p #{dirname}" if verbose?
    return if no_harm?

    # Does not check '/', it's too abnormal.
    dirs = File.expand_path(dirname).split(%r<(?=/)>)
    if /\A[a-z]:\z/i =~ dirs[0]
      disk = dirs.shift
      dirs[0] = disk + dirs[0]
    end
    dirs.each_index do |idx|
      path = dirs[0..idx].join('')
      Dir.mkdir path unless File.dir?(path)
    end
  end

  def rm_f(path)
    $stderr.puts "rm -f #{path}" if verbose?
    return if no_harm?
    force_remove_file path
  end

  def rm_rf(path)
    $stderr.puts "rm -rf #{path}" if verbose?
    return if no_harm?
    remove_tree path
  end

  def remove_tree(path)
    if File.symlink?(path)
      remove_file path
    elsif File.dir?(path)
      remove_tree0 path
    else
      force_remove_file path
    end
  end

  def remove_tree0(path)
    Dir.foreach(path) do |ent|
      next if ent == '.'
      next if ent == '..'
      entpath = "#{path}/#{ent}"
      if File.symlink?(entpath)
        remove_file entpath
      elsif File.dir?(entpath)
        remove_tree0 entpath
      else
        force_remove_file entpath
      end
    end
    begin
      Dir.rmdir path
    rescue Errno::ENOTEMPTY
      # directory may not be empty
    end
  end

  def move_file(src, dest)
    force_remove_file dest
    begin
      File.rename src, dest
    rescue
      File.open(dest, 'wb') {|f|
        f.write File.binread(src)
      }
      File.chmod File.stat(src).mode, dest
      File.unlink src
    end
  end

  def force_remove_file(path)
    begin
      remove_file path
    rescue
    end
  end

  def remove_file(path)
    File.chmod 0777, path
    File.unlink path
  end

  def install(from, dest, mode, prefix = nil)
    $stderr.puts "install #{from} #{dest}" if verbose?
    return if no_harm?

    realdest = prefix ? prefix + File.expand_path(dest) : dest
    realdest = File.join(realdest, File.basename(from)) if File.dir?(realdest)
    str = File.binread(from)
    if diff?(str, realdest)
      verbose_off {
        rm_f realdest if File.exist?(realdest)
      }
      File.open(realdest, 'wb') {|f|
        f.write str
      }
      File.chmod mode, realdest

      File.open("#{objdir_root()}/InstalledFiles", 'a') {|f|
        if prefix
          f.puts realdest.sub(prefix, '')
        else
          f.puts realdest
        end
      }
    end
  end

  def diff?(new_content, path)
    return true unless File.exist?(path)
    new_content != File.binread(path)
  end

  def command(*args)
    $stderr.puts args.join(' ') if verbose?
    system(*args) or raise RuntimeError,
        "system(#{args.map{|a| a.inspect }.join(' ')}) failed"
  end

  def ruby(*args)
    command config('rubyprog'), *args
  end
  
  def make(task = nil)
    command(*[config('makeprog'), task].compact)
  end

  def extdir?(dir)
    File.exist?("#{dir}/MANIFEST") or File.exist?("#{dir}/extconf.rb")
  end

  def files_of(dir)
    Dir.open(dir) {|d|
      return d.select {|ent| File.file?("#{dir}/#{ent}") }
    }
  end

  DIR_REJECT = %w( . .. CVS SCCS RCS CVS.adm .svn )

  def directories_of(dir)
    Dir.open(dir) {|d|
      return d.select {|ent| File.dir?("#{dir}/#{ent}") } - DIR_REJECT
    }
  end

end


# This module requires: #srcdir_root, #objdir_root, #relpath
module HookScriptAPI

  def get_config(key)
    @config[key]
  end

  alias config get_config

  # obsolete: use metaconfig to change configuration
  def set_config(key, val)
    @config[key] = val
  end

  #
  # srcdir/objdir (works only in the package directory)
  #

  def curr_srcdir
    "#{srcdir_root()}/#{relpath()}"
  end

  def curr_objdir
    "#{objdir_root()}/#{relpath()}"
  end

  def srcfile(path)
    "#{curr_srcdir()}/#{path}"
  end

  def srcexist?(path)
    File.exist?(srcfile(path))
  end

  def srcdirectory?(path)
    File.dir?(srcfile(path))
  end
  
  def srcfile?(path)
    File.file?(srcfile(path))
  end

  def srcentries(path = '.')
    Dir.open("#{curr_srcdir()}/#{path}") {|d|
      return d.to_a - %w(. ..)
    }
  end

  def srcfiles(path = '.')
    srcentries(path).select {|fname|
      File.file?(File.join(curr_srcdir(), path, fname))
    }
  end

  def srcdirectories(path = '.')
    srcentries(path).select {|fname|
      File.dir?(File.join(curr_srcdir(), path, fname))
    }
  end

end


class ToplevelInstaller

  Version   = '3.4.1'
  Copyright = 'Copyright (c) 2000-2005 Minero Aoki'

  TASKS = [
    [ 'all',      'do config, setup, then install' ],
    [ 'config',   'saves your configurations' ],
    [ 'show',     'shows current configuration' ],
    [ 'setup',    'compiles ruby extentions and others' ],
    [ 'install',  'installs files' ],
    [ 'test',     'run all tests in test/' ],
    [ 'clean',    "does `make clean' for each extention" ],
    [ 'distclean',"does `make distclean' for each extention" ]
  ]

  def ToplevelInstaller.invoke
    config = ConfigTable.new(load_rbconfig())
    config.load_standard_entries
    config.load_multipackage_entries if multipackage?
    config.fixup
    klass = (multipackage?() ? ToplevelInstallerMulti : ToplevelInstaller)
    klass.new(File.dirname($0), config).invoke
  end

  def ToplevelInstaller.multipackage?
    File.dir?(File.dirname($0) + '/packages')
  end

  def ToplevelInstaller.load_rbconfig
    if arg = ARGV.detect {|arg| /\A--rbconfig=/ =~ arg }
      ARGV.delete(arg)
      load File.expand_path(arg.split(/=/, 2)[1])
      $".push 'rbconfig.rb'
    else
      require 'rbconfig'
    end
    ::Config::CONFIG
  end

  def initialize(ardir_root, config)
    @ardir = File.expand_path(ardir_root)
    @config = config
    # cache
    @valid_task_re = nil
  end

  def config(key)
    @config[key]
  end

  def inspect
    "#<#{self.class} #{__id__()}>"
  end

  def invoke
    run_metaconfigs
    case task = parsearg_global()
    when nil, 'all'
      parsearg_config
      init_installers
      exec_config
      exec_setup
      exec_install
    else
      case task
      when 'config', 'test'
        ;
      when 'clean', 'distclean'
        @config.load_savefile if File.exist?(@config.savefile)
      else
        @config.load_savefile
      end
      __send__ "parsearg_#{task}"
      init_installers
      __send__ "exec_#{task}"
    end
  end
  
  def run_metaconfigs
    @config.load_script "#{@ardir}/metaconfig"
  end

  def init_installers
    @installer = Installer.new(@config, @ardir, File.expand_path('.'))
  end

  #
  # Hook Script API bases
  #

  def srcdir_root
    @ardir
  end

  def objdir_root
    '.'
  end

  def relpath
    '.'
  end

  #
  # Option Parsing
  #

  def parsearg_global
    while arg = ARGV.shift
      case arg
      when /\A\w+\z/
        setup_rb_error "invalid task: #{arg}" unless valid_task?(arg)
        return arg
      when '-q', '--quiet'
        @config.verbose = false
      when '--verbose'
        @config.verbose = true
      when '--help'
        print_usage $stdout
        exit 0
      when '--version'
        puts "#{File.basename($0)} version #{Version}"
        exit 0
      when '--copyright'
        puts Copyright
        exit 0
      else
        setup_rb_error "unknown global option '#{arg}'"
      end
    end
    nil
  end

  def valid_task?(t)
    valid_task_re() =~ t
  end

  def valid_task_re
    @valid_task_re ||= /\A(?:#{TASKS.map {|task,desc| task }.join('|')})\z/
  end

  def parsearg_no_options
    unless ARGV.empty?
      task = caller(0).first.slice(%r<`parsearg_(\w+)'>, 1)
      setup_rb_error "#{task}: unknown options: #{ARGV.join(' ')}"
    end
  end

  alias parsearg_show       parsearg_no_options
  alias parsearg_setup      parsearg_no_options
  alias parsearg_test       parsearg_no_options
  alias parsearg_clean      parsearg_no_options
  alias parsearg_distclean  parsearg_no_options

  def parsearg_config
    evalopt = []
    set = []
    @config.config_opt = []
    while i = ARGV.shift
      if /\A--?\z/ =~ i
        @config.config_opt = ARGV.dup
        break
      end
      name, value = *@config.parse_opt(i)
      if @config.value_config?(name)
        @config[name] = value
      else
        evalopt.push [name, value]
      end
      set.push name
    end
    evalopt.each do |name, value|
      @config.lookup(name).evaluate value, @config
    end
    # Check if configuration is valid
    set.each do |n|
      @config[n] if @config.value_config?(n)
    end
  end

  def parsearg_install
    @config.no_harm = false
    @config.install_prefix = ''
    while a = ARGV.shift
      case a
      when '--no-harm'
        @config.no_harm = true
      when /\A--prefix=/
        path = a.split(/=/, 2)[1]
        path = File.expand_path(path) unless path[0,1] == '/'
        @config.install_prefix = path
      else
        setup_rb_error "install: unknown option #{a}"
      end
    end
  end

  def print_usage(out)
    out.puts 'Typical Installation Procedure:'
    out.puts "  $ ruby #{File.basename $0} config"
    out.puts "  $ ruby #{File.basename $0} setup"
    out.puts "  # ruby #{File.basename $0} install (may require root privilege)"
    out.puts
    out.puts 'Detailed Usage:'
    out.puts "  ruby #{File.basename $0} <global option>"
    out.puts "  ruby #{File.basename $0} [<global options>] <task> [<task options>]"

    fmt = "  %-24s %s\n"
    out.puts
    out.puts 'Global options:'
    out.printf fmt, '-q,--quiet',   'suppress message outputs'
    out.printf fmt, '   --verbose', 'output messages verbosely'
    out.printf fmt, '   --help',    'print this message'
    out.printf fmt, '   --version', 'print version and quit'
    out.printf fmt, '   --copyright',  'print copyright and quit'
    out.puts
    out.puts 'Tasks:'
    TASKS.each do |name, desc|
      out.printf fmt, name, desc
    end

    fmt = "  %-24s %s [%s]\n"
    out.puts
    out.puts 'Options for CONFIG or ALL:'
    @config.each do |item|
      out.printf fmt, item.help_opt, item.description, item.help_default
    end
    out.printf fmt, '--rbconfig=path', 'rbconfig.rb to load',"running ruby's"
    out.puts
    out.puts 'Options for INSTALL:'
    out.printf fmt, '--no-harm', 'only display what to do if given', 'off'
    out.printf fmt, '--prefix=path',  'install path prefix', ''
    out.puts
  end

  #
  # Task Handlers
  #

  def exec_config
    @installer.exec_config
    @config.save   # must be final
  end

  def exec_setup
    @installer.exec_setup
  end

  def exec_install
    @installer.exec_install
  end

  def exec_test
    @installer.exec_test
  end

  def exec_show
    @config.each do |i|
      printf "%-20s %s\n", i.name, i.value if i.value?
    end
  end

  def exec_clean
    @installer.exec_clean
  end

  def exec_distclean
    @installer.exec_distclean
  end

end   # class ToplevelInstaller


class ToplevelInstallerMulti < ToplevelInstaller

  include FileOperations

  def initialize(ardir_root, config)
    super
    @packages = directories_of("#{@ardir}/packages")
    raise 'no package exists' if @packages.empty?
    @root_installer = Installer.new(@config, @ardir, File.expand_path('.'))
  end

  def run_metaconfigs
    @config.load_script "#{@ardir}/metaconfig", self
    @packages.each do |name|
      @config.load_script "#{@ardir}/packages/#{name}/metaconfig"
    end
  end

  attr_reader :packages

  def packages=(list)
    raise 'package list is empty' if list.empty?
    list.each do |name|
      raise "directory packages/#{name} does not exist"\
              unless File.dir?("#{@ardir}/packages/#{name}")
    end
    @packages = list
  end

  def init_installers
    @installers = {}
    @packages.each do |pack|
      @installers[pack] = Installer.new(@config,
                                       "#{@ardir}/packages/#{pack}",
                                       "packages/#{pack}")
    end
    with    = extract_selection(config('with'))
    without = extract_selection(config('without'))
    @selected = @installers.keys.select {|name|
                  (with.empty? or with.include?(name)) \
                      and not without.include?(name)
                }
  end

  def extract_selection(list)
    a = list.split(/,/)
    a.each do |name|
      setup_rb_error "no such package: #{name}"  unless @installers.key?(name)
    end
    a
  end

  def print_usage(f)
    super
    f.puts 'Inluded packages:'
    f.puts '  ' + @packages.sort.join(' ')
    f.puts
  end

  #
  # Task Handlers
  #

  def exec_config
    run_hook 'pre-config'
    each_selected_installers {|inst| inst.exec_config }
    run_hook 'post-config'
    @config.save   # must be final
  end

  def exec_setup
    run_hook 'pre-setup'
    each_selected_installers {|inst| inst.exec_setup }
    run_hook 'post-setup'
  end

  def exec_install
    run_hook 'pre-install'
    each_selected_installers {|inst| inst.exec_install }
    run_hook 'post-install'
  end

  def exec_test
    run_hook 'pre-test'
    each_selected_installers {|inst| inst.exec_test }
    run_hook 'post-test'
  end

  def exec_clean
    rm_f @config.savefile
    run_hook 'pre-clean'
    each_selected_installers {|inst| inst.exec_clean }
    run_hook 'post-clean'
  end

  def exec_distclean
    rm_f @config.savefile
    run_hook 'pre-distclean'
    each_selected_installers {|inst| inst.exec_distclean }
    run_hook 'post-distclean'
  end

  #
  # lib
  #

  def each_selected_installers
    Dir.mkdir 'packages' unless File.dir?('packages')
    @selected.each do |pack|
      $stderr.puts "Processing the package `#{pack}' ..." if verbose?
      Dir.mkdir "packages/#{pack}" unless File.dir?("packages/#{pack}")
      Dir.chdir "packages/#{pack}"
      yield @installers[pack]
      Dir.chdir '../..'
    end
  end

  def run_hook(id)
    @root_installer.run_hook id
  end

  # module FileOperations requires this
  def verbose?
    @config.verbose?
  end

  # module FileOperations requires this
  def no_harm?
    @config.no_harm?
  end

end   # class ToplevelInstallerMulti


class Installer

  FILETYPES = %w( bin lib ext data conf man )

  include FileOperations
  include HookScriptAPI

  def initialize(config, srcroot, objroot)
    @config = config
    @srcdir = File.expand_path(srcroot)
    @objdir = File.expand_path(objroot)
    @currdir = '.'
  end

  def inspect
    "#<#{self.class} #{File.basename(@srcdir)}>"
  end

  def noop(rel)
  end

  #
  # Hook Script API base methods
  #

  def srcdir_root
    @srcdir
  end

  def objdir_root
    @objdir
  end

  def relpath
    @currdir
  end

  #
  # Config Access
  #

  # module FileOperations requires this
  def verbose?
    @config.verbose?
  end

  # module FileOperations requires this
  def no_harm?
    @config.no_harm?
  end

  def verbose_off
    begin
      save, @config.verbose = @config.verbose?, false
      yield
    ensure
      @config.verbose = save
    end
  end

  #
  # TASK config
  #

  def exec_config
    exec_task_traverse 'config'
  end

  alias config_dir_bin noop
  alias config_dir_lib noop

  def config_dir_ext(rel)
    extconf if extdir?(curr_srcdir())
  end

  alias config_dir_data noop
  alias config_dir_conf noop
  alias config_dir_man noop

  def extconf
    ruby "#{curr_srcdir()}/extconf.rb", *@config.config_opt
  end

  #
  # TASK setup
  #

  def exec_setup
    exec_task_traverse 'setup'
  end

  def setup_dir_bin(rel)
    files_of(curr_srcdir()).each do |fname|
      update_shebang_line "#{curr_srcdir()}/#{fname}"
    end
  end

  alias setup_dir_lib noop

  def setup_dir_ext(rel)
    make if extdir?(curr_srcdir())
  end

  alias setup_dir_data noop
  alias setup_dir_conf noop
  alias setup_dir_man noop

  def update_shebang_line(path)
    return if no_harm?
    return if config('shebang') == 'never'
    old = Shebang.load(path)
    if old
      $stderr.puts "warning: #{path}: Shebang line includes too many args.  It is not portable and your program may not work." if old.args.size > 1
      new = new_shebang(old)
      return if new.to_s == old.to_s
    else
      return unless config('shebang') == 'all'
      new = Shebang.new(config('rubypath'))
    end
    $stderr.puts "updating shebang: #{File.basename(path)}" if verbose?
    open_atomic_writer(path) {|output|
      File.open(path, 'rb') {|f|
        f.gets if old   # discard
        output.puts new.to_s
        output.print f.read
      }
    }
  end

  def new_shebang(old)
    if /\Aruby/ =~ File.basename(old.cmd)
      Shebang.new(config('rubypath'), old.args)
    elsif File.basename(old.cmd) == 'env' and old.args.first == 'ruby'
      Shebang.new(config('rubypath'), old.args[1..-1])
    else
      return old unless config('shebang') == 'all'
      Shebang.new(config('rubypath'))
    end
  end

  def open_atomic_writer(path, &block)
    tmpfile = File.basename(path) + '.tmp'
    begin
      File.open(tmpfile, 'wb', &block)
      File.rename tmpfile, File.basename(path)
    ensure
      File.unlink tmpfile if File.exist?(tmpfile)
    end
  end

  class Shebang
    def Shebang.load(path)
      line = nil
      File.open(path) {|f|
        line = f.gets
      }
      return nil unless /\A#!/ =~ line
      parse(line)
    end

    def Shebang.parse(line)
      cmd, *args = *line.strip.sub(/\A\#!/, '').split(' ')
      new(cmd, args)
    end

    def initialize(cmd, args = [])
      @cmd = cmd
      @args = args
    end

    attr_reader :cmd
    attr_reader :args

    def to_s
      "#! #{@cmd}" + (@args.empty? ? '' : " #{@args.join(' ')}")
    end
  end

  #
  # TASK install
  #

  def exec_install
    rm_f 'InstalledFiles'
    exec_task_traverse 'install'
  end

  def install_dir_bin(rel)
    install_files targetfiles(), "#{config('bindir')}/#{rel}", 0755
  end

  def install_dir_lib(rel)
    install_files libfiles(), "#{config('rbdir')}/#{rel}", 0644
  end

  def install_dir_ext(rel)
    return unless extdir?(curr_srcdir())
    install_files rubyextentions('.'),
                  "#{config('sodir')}/#{File.dirname(rel)}",
                  0555
  end

  def install_dir_data(rel)
    install_files targetfiles(), "#{config('datadir')}/#{rel}", 0644
  end

  def install_dir_conf(rel)
    # FIXME: should not remove current config files
    # (rename previous file to .old/.org)
    install_files targetfiles(), "#{config('sysconfdir')}/#{rel}", 0644
  end

  def install_dir_man(rel)
    install_files targetfiles(), "#{config('mandir')}/#{rel}", 0644
  end

  def install_files(list, dest, mode)
    mkdir_p dest, @config.install_prefix
    list.each do |fname|
      install fname, dest, mode, @config.install_prefix
    end
  end

  def libfiles
    glob_reject(%w(*.y *.output), targetfiles())
  end

  def rubyextentions(dir)
    ents = glob_select("*.#{@config.dllext}", targetfiles())
    if ents.empty?
      setup_rb_error "no ruby extention exists: 'ruby #{$0} setup' first"
    end
    ents
  end

  def targetfiles
    mapdir(existfiles() - hookfiles())
  end

  def mapdir(ents)
    ents.map {|ent|
      if File.exist?(ent)
      then ent                         # objdir
      else "#{curr_srcdir()}/#{ent}"   # srcdir
      end
    }
  end

  # picked up many entries from cvs-1.11.1/src/ignore.c
  JUNK_FILES = %w( 
    core RCSLOG tags TAGS .make.state
    .nse_depinfo #* .#* cvslog.* ,* .del-* *.olb
    *~ *.old *.bak *.BAK *.orig *.rej _$* *$

    *.org *.in .*
  )

  def existfiles
    glob_reject(JUNK_FILES, (files_of(curr_srcdir()) | files_of('.')))
  end

  def hookfiles
    %w( pre-%s post-%s pre-%s.rb post-%s.rb ).map {|fmt|
      %w( config setup install clean ).map {|t| sprintf(fmt, t) }
    }.flatten
  end

  def glob_select(pat, ents)
    re = globs2re([pat])
    ents.select {|ent| re =~ ent }
  end

  def glob_reject(pats, ents)
    re = globs2re(pats)
    ents.reject {|ent| re =~ ent }
  end

  GLOB2REGEX = {
    '.' => '\.',
    '$' => '\$',
    '#' => '\#',
    '*' => '.*'
  }

  def globs2re(pats)
    /\A(?:#{
      pats.map {|pat| pat.gsub(/[\.\$\#\*]/) {|ch| GLOB2REGEX[ch] } }.join('|')
    })\z/
  end

  #
  # TASK test
  #

  TESTDIR = 'test'

  def exec_test
    unless File.directory?('test')
      $stderr.puts 'no test in this package' if verbose?
      return
    end
    $stderr.puts 'Running tests...' if verbose?
    begin
      require 'test/unit'
    rescue LoadError
      setup_rb_error 'test/unit cannot loaded.  You need Ruby 1.8 or later to invoke this task.'
    end
    runner = Test::Unit::AutoRunner.new(true)
    runner.to_run << TESTDIR
    runner.run
  end

  #
  # TASK clean
  #

  def exec_clean
    exec_task_traverse 'clean'
    rm_f @config.savefile
    rm_f 'InstalledFiles'
  end

  alias clean_dir_bin noop
  alias clean_dir_lib noop
  alias clean_dir_data noop
  alias clean_dir_conf noop
  alias clean_dir_man noop

  def clean_dir_ext(rel)
    return unless extdir?(curr_srcdir())
    make 'clean' if File.file?('Makefile')
  end

  #
  # TASK distclean
  #

  def exec_distclean
    exec_task_traverse 'distclean'
    rm_f @config.savefile
    rm_f 'InstalledFiles'
  end

  alias distclean_dir_bin noop
  alias distclean_dir_lib noop

  def distclean_dir_ext(rel)
    return unless extdir?(curr_srcdir())
    make 'distclean' if File.file?('Makefile')
  end

  alias distclean_dir_data noop
  alias distclean_dir_conf noop
  alias distclean_dir_man noop

  #
  # Traversing
  #

  def exec_task_traverse(task)
    run_hook "pre-#{task}"
    FILETYPES.each do |type|
      if type == 'ext' and config('without-ext') == 'yes'
        $stderr.puts 'skipping ext/* by user option' if verbose?
        next
      end
      traverse task, type, "#{task}_dir_#{type}"
    end
    run_hook "post-#{task}"
  end

  def traverse(task, rel, mid)
    dive_into(rel) {
      run_hook "pre-#{task}"
      __send__ mid, rel.sub(%r[\A.*?(?:/|\z)], '')
      directories_of(curr_srcdir()).each do |d|
        traverse task, "#{rel}/#{d}", mid
      end
      run_hook "post-#{task}"
    }
  end

  def dive_into(rel)
    return unless File.dir?("#{@srcdir}/#{rel}")

    dir = File.basename(rel)
    Dir.mkdir dir unless File.dir?(dir)
    prevdir = Dir.pwd
    Dir.chdir dir
    $stderr.puts '---> ' + rel if verbose?
    @currdir = rel
    yield
    Dir.chdir prevdir
    $stderr.puts '<--- ' + rel if verbose?
    @currdir = File.dirname(rel)
  end

  def run_hook(id)
    path = [ "#{curr_srcdir()}/#{id}",
             "#{curr_srcdir()}/#{id}.rb" ].detect {|cand| File.file?(cand) }
    return unless path
    begin
      instance_eval File.read(path), path, 1
    rescue
      raise if $DEBUG
      setup_rb_error "hook #{path} failed:\n" + $!.message
    end
  end

end   # class Installer


class SetupError < StandardError; end

def setup_rb_error(msg)
  raise SetupError, msg
end

if $0 == __FILE__
  begin
    ToplevelInstaller.invoke
  rescue SetupError
    raise if $DEBUG
    $stderr.puts $!.message
    $stderr.puts "Try 'ruby #{$0} --help' for detailed usage."
    exit 1
  end
end


================================================
FILE: test/fixtures/correlation_matrix.rb
================================================
# Retrieve Correlation matrix for eigth variables
module Statsample
  module Fixtures
    def harman_817
      Matrix[
    [1.0,   0.84,   0.62,   -0.53,   0.03,    0.57,   -0.33,  -0.63],
    [0.84,  1.00,   0.84,   -0.68,  -0.05,    0.76,   -0.35,  -0.73],
    [0.62,  0.84,   1.00,   -0.76,   0.08,    0.81,   -0.51,  -0.81],
    [-0.53, -0.68,  -0.76,  1.00,    -0.25,  -0.80,    0.62,   0.88],
    [0.03,  -0.05,  0.08,   -0.25,   1.00,    0.25,   -0.72,  -0.36],
    [0.57,   0.76,  0.81,   -0.80,   0.25,    1.00,   -0.58,  -0.84],
    [-0.33, -0.35, -0.51,    0.62,  -0.72,   -0.58,    1.00,   0.68],
    [-0.63, -0.73, -0.81,    0.88,  -0.36,   -0.84,    0.68,   1.00]
    ].extend(Statsample::CovariateMatrix)
    end
  end
end


================================================
FILE: test/fixtures/hartman_23.matrix
================================================
"height" "arm.span" "forearm" "lower.leg" "weight" "bitro.diameter" "chest.girth" "chest.width"
"height" 1 0.846 0.805 0.859 0.473 0.398 0.301 0.382
"arm.span" 0.846 1 0.881 0.826 0.376 0.326 0.277 0.415
"forearm" 0.805 0.881 1 0.801 0.38 0.319 0.237 0.345
"lower.leg" 0.859 0.826 0.801 1 0.436 0.329 0.327 0.365
"weight" 0.473 0.376 0.38 0.436 1 0.762 0.73 0.629
"bitro.diameter" 0.398 0.326 0.319 0.329 0.762 1 0.583 0.577
"chest.girth" 0.301 0.277 0.237 0.327 0.73 0.583 1 0.539
"chest.width" 0.382 0.415 0.345 0.365 0.629 0.577 0.539 1


================================================
FILE: test/fixtures/repeated_fields.csv
================================================
"id","name","age","city","a1","name","age"
1,"Alex",20,"New York","a,b","a",3
2,"Claude",23,"London","b,c","b",4
3,"Peter",25,"London","a","c",5
4,"Franz",27,"Paris",,"d",6
5,"George","5,5","Tome","a,b,c","f",
6,"Fernand",20,"London","c,b","f",8


================================================
FILE: test/fixtures/stock_data.csv
================================================
17.66
17.65
17.68
17.66
17.68
17.67
17.68
17.68
17.67
17.67
17.68
17.71
17.74
17.72
17.73
17.76
17.74
17.69
17.69
17.67
17.66
17.67
17.69
17.69
17.68
17.65
17.65
17.64
17.63
17.64
17.67
17.68
17.7
17.68
17.69
17.69
17.72
17.71
17.71
17.71
17.69
17.69
17.71
17.72
17.71
17.68
17.68
17.68
17.69
17.68
17.68
17.69
17.67
17.69
17.71
17.7
17.7
17.71
17.73
17.74
17.74
17.74
17.76
17.77
17.55
17.55
17.5
17.46
17.49
17.54
17.51
17.54
17.57
17.54
17.52
17.53
17.56
17.55
17.55
17.54
17.55
17.55
17.55
17.54
17.52
17.53
17.51
17.52
17.5
17.5
17.5
17.49
17.46
17.47
17.48
17.45
17.41
17.39
17.38
17.43
17.44
17.43
17.43
17.46
17.46
17.47
17.47
17.45
17.48
17.49
17.5
17.49
17.48
17.49
17.47
17.47
17.44
17.44
17.43
17.45
17.42
17.43
17.43
17.44
17.44
17.43
17.41
17.41
17.38
17.38
17.37
17.37
17.37
17.3
17.28
17.27
17.19
16.41
16.44
16.48
16.53
16.51
16.57
16.54
16.59
16.64
16.6
16.65
16.69
16.69
16.68
16.64
16.65
16.66
16.64
16.61
16.65
16.67
16.66
16.65
16.61
16.59
16.57
16.55
16.55
16.57
16.54
16.6
16.62
16.6
16.59
16.61
16.66
16.69
16.67
16.65
16.66
16.65
16.65
16.68
16.68
16.67
16.64
16.73
16.76
16.75
16.79
16.8
16.77
16.74
16.76
16.83
16.84
16.82
16.89
16.93
16.94
16.9
16.92
16.88
16.85
16.87
16.8
16.79
16.85
16.85
16.8
16.82
16.85
16.9
16.86
16.79
16.75
16.78
17.06
17.05
17.04
17.02
17.01
17.02
17.05
17.07
17.08
17.09
17.1
17.11
17.09
17.1
17.1
17.12
17.17
17.16
17.17
17.18
17.18
17.18
17.17
17.15
17.14
17.13
17.14
17.13
17.12
17.12
17.09
17.09
17.11
17.06
17.07
17.06
17.07
17.06
17.09
17.05
17.04
17.04
16.99
17
17.03
17
16.97
16.96
16.98
16.98
16.98
17.03
17
17
17
17.02
17
17.02
17.01
17.02
17.03
17.03
17.01
17.03
17.03
17.03
17.01
17.03
17.05
17.05
17.08
17.04
17.01
17.03
17.02
17.03
17.04
17.05
17.37
17.35
17.34
17.32
17.29
17.29
17.22
17.26
17.3
17.34
17.33
17.39
17.4
17.39
17.48
17.5
17.47
17.43
17.4
17.42
17.46
17.48
17.48
17.46
17.46
17.45
17.43
17.44
17.48
17.43
17.45
17.47
17.46
17.46
17.48
17.48
17.48
17.46
17.5
17.55
17.58
17.57
17.56
17.59
17.61
17.62
17.63
17.62
17.61
17.61
17.62
17.64
17.65
17.61
17.62
17.66
17.65
17.64
17.63
17.64
17.64
17.64
17.63
17.61
17.61
17.62
17.63
17.64
17.65
17.66
17.68
17.69
17.69
17.69
17.66
17.69
17.69
17.62
17.68
17.64
17.65
17.61
17.52
17.56
17.55
17.55
17.48
17.45
17.46
17.46
17.44
17.47
17.5
17.49
17.5
17.53
17.53
17.54
17.51
17.51
17.53
17.53
17.53
17.55
17.55
17.54
17.56
17.59
17.57
17.58
17.58
17.57
17.59
17.57
17.55
17.51
17.51
17.52
17.52
17.53
17.55
17.59
17.61
17.61
17.6
17.6
17.62
17.65
17.62
17.6
17.6
17.62
17.61
17.62
17.63
17.64
17.65
17.61
17.62
17.64
17.63
17.62
17.6
17.57
17.57
17.6
17.59
17.6
17.61
17.61
17.63
17.63
17.59
17.58
17.76
17.79
17.76
17.73
17.74
17.73
17.67
17.66
17.66
17.64
17.63
17.62
17.61
17.6
17.61
17.61
17.6
17.6
17.64
17.65
17.65
17.63
17.61
17.6
17.63
17.63
17.62
17.63
17.64
17.62
17.63
17.65
17.64
17.6
17.59
17.59
17.58
17.58
17.6
17.6
17.6
17.6
17.6
17.58
17.59
17.6
17.6
17.6
17.59
17.59
17.58
17.58
17.65
17.65


================================================
FILE: test/fixtures/test_csv.csv
================================================
"id","name","age","city","a1"
1,"Alex",20,"New York","a,b"
2,"Claude",23,"London","b,c"
3,"Peter",25,"London","a"
4,"Franz",27,"Paris",
5,"George","5,5","Tome","a,b,c"
6,"Fernand",,,


================================================
FILE: test/fixtures/tetmat_matrix.txt
================================================
   1.0000000   0.1703164   0.2275128   0.1071861   0.0665047
   0.1703164   1.0000000   0.1890911   0.1111471   0.1724219
   0.2275128   0.1890911   1.0000000   0.1866805   0.1055028
   0.1071861   0.1111471   0.1866805   1.0000000   0.2009241
   0.0665047   0.1724219   0.1055028   0.2009241   1.0000000


================================================
FILE: test/fixtures/tetmat_test.txt
================================================
 1 1 1 1 1
 1 1 1 1 1
 1 1 1 1 1
 1 1 1 1 2
 1 1 1 1 2
 1 1 1 1 2
 1 1 1 1 2
 1 1 1 1 2
 1 1 1 1 2
 1 1 1 2 1
 1 1 1 2 1
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 1 2 2
 1 1 2 1 1
 1 1 2 1 2
 1 1 2 2 1
 1 1 2 2 1
 1 1 2 2 1
 1 1 2 2 2
 1 1 2 2 2
 1 1 2 2 2
 1 1 2 2 2
 1 2 1 1 1
 1 2 1 1 2
 1 2 1 1 2
 1 2 1 1 2
 1 2 1 1 2
 1 2 1 1 2
 1 2 1 1 2
 1 2 1 1 2
 1 2 1 1 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 1 2 2
 1 2 2 1 2
 1 2 2 1 2
 1 2 2 1 2
 1 2 2 2 1
 1 2 2 2 1
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 1 2 2 2 2
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 1
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 1 2
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 1
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 1 2 2
 2 1 2 1 1
 2 1 2 1 1
 2 1 2 1 1
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 1 2
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 1
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 1 2 2 2
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 1
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 1 2
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 1
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 1 2 2
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 1
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 1 2
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 1
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2
 2 2 2 2 2


================================================
FILE: test/helpers_tests.rb
================================================
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))
require 'minitest'
require 'minitest/unit'
require 'mocha/setup'
require 'tempfile'
require 'tmpdir'
require 'shoulda'
require 'shoulda-context'
require 'fixtures/correlation_matrix'

require 'statsample'


module MiniTest
  class Test
    include Shoulda::Context::Assertions
    include Shoulda::Context::InstanceMethods
    extend Shoulda::Context::ClassMethods
      def self.should_with_gsl(name,&block)
        should(name) do
          if Statsample.has_gsl?
            instance_eval(&block)
          else
            skip("Requires GSL")
          end
         
        end
    
      
    end
  end

  module Assertions
    def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
      msg||="Different vectors #{exp} - #{obs}"
      assert_equal(exp.size, obs.size)
      exp.data_with_nils.each_with_index {|v,i|
        assert_in_delta(v,obs[i],delta)
      }
    end
    def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
      assert_equal(exp.size, obs.size, "Different size.#{msg}")
      exp.size.times {|i|
        assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
      }
    end
    def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
       assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
       assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
       exp.row_size.times {|i|
         exp.column_size.times {|j|
           assert_in_delta(exp[i,j],obs[i,j], delta, "Different element #{i},#{j}\nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
         }
       }
    end
    alias :assert_raise :assert_raises unless method_defined? :assert_raise
    alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
    alias :assert_not_same :refute_same unless method_defined? :assert_not_same
    unless method_defined? :assert_nothing_raised
      def assert_nothing_raised(msg=nil)
        msg||="Nothing should be raised, but raised %s"
        begin
          yield
          not_raised=true
        rescue Exception => e
          not_raised=false
          msg=sprintf(msg,e)
        end
        assert(not_raised,msg)
      end
    end
  end
end

MiniTest.autorun


================================================
FILE: test/test_analysis.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleAnalysisTestCase < MiniTest::Unit::TestCase
  context(Statsample::Analysis) do
    setup do
      Statsample::Analysis.clear_analysis
    end
    should "store() should create and store Statsample::Analysis::Suite" do
      Statsample::Analysis.store(:first) do
        a=1
      end
      assert(Statsample::Analysis.stored_analysis[:first])
      assert(Statsample::Analysis.stored_analysis[:first].is_a? Statsample::Analysis::Suite)
    end
    
    should "ss_analysis should create an Statsample::Analysis" do
      ss_analysis(:first) {a=1}
    end
    should "store last created analysis" do
      an=Statsample::Analysis.store(:first) do
        a=1
      end
      assert_equal(an,Statsample::Analysis.last)
    end
    
    should "add_to_reportbuilder() add sections to reportbuilder object" do
      rb=mock()
      rb.expects(:add).with {|value| value.is_a? ReportBuilder::Section and value.name==:first}
      rb.expects(:add).with {|value| value.is_a? ReportBuilder::Section and value.name==:second}
      
      Statsample::Analysis.store(:first) do
        echo "first","second"
      end
      Statsample::Analysis.store(:second) do
        echo "third"
      end
      Statsample::Analysis.add_to_reportbuilder(rb,:first,:second)
    end
    should "to_text returns the same as a normal ReportBuilder object" do
      rb=ReportBuilder.new(:name=>:test)
      section=ReportBuilder::Section.new(:name=>"first")
      a=[1,2,3].to_scale
      section.add("first")
      section.add(a)
      rb.add(section)
      exp=rb.to_text
      an=ss_analysis(:first) {
        echo 'first'
        summary(a)
      }
      obs=Statsample::Analysis.to_text(:first)
      
      assert_equal(exp.split("\n")[1,exp.size], obs.split("\n")[1,obs.size])
    end
    
    should "run() execute all analysis by default" do
      m1=mock()
      m1.expects(:run).once
      m1.expects(:hide).once
      
      Statsample::Analysis.store(:first) do
        m1.run
      end
      Statsample::Analysis.store(:second) do
        m1.hide
      end
      
      # Should run all test
      Statsample::Analysis.run
    end
    
    should "run() execute blocks specificed on parameters" do
      m1=mock()
      m1.expects(:run).once   
      m1.expects(:hide).never
      Statsample::Analysis.store(:first) do
        m1.run
      end
      Statsample::Analysis.store(:second) do
        m1.hide
      end
      # Should run all test
      Statsample::Analysis.run(:first)
    end
   
    context(Statsample::Analysis::Suite) do
      should "echo() uses output#puts with same arguments" do
        an=Statsample::Analysis::Suite.new(:output)
        obj=mock()
        obj.expects(:puts).with(:first,:second).once
        an.output=obj
        an.echo(:first,:second)
      end
      should "summary() should call object.summary" do
        an=Statsample::Analysis::Suite.new(:summary)
        obj=stub('summarizable',:summary=>'summary')
        assert_equal(obj.summary,an.summary(obj))
      end
      should "attach() allows to call objects on objects which respond to fields" do
        an=Statsample::Analysis::Suite.new(:summary)
        ds={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}
        ds.expects(:fields).returns(%w{x y}).at_least_once
        an.attach(ds)
        assert_equal(10,an.x.mean)
        assert_equal(12,an.y.mean)
        assert_raise(RuntimeError) {
          an.z
        }
      end
      should "attached objects should be called LIFO" do
        an=Statsample::Analysis::Suite.new(:summary)
        ds1={'x'=>stub(:mean=>100),'y'=>stub(:mean=>120),'z'=>stub(:mean=>13)}
        ds1.expects(:fields).returns(%w{x y z}).at_least_once
        ds2={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}
        ds2.expects(:fields).returns(%w{x y}).at_least_once
        an.attach(ds1)
        an.attach(ds2)
        assert_equal(10,an.x.mean)
        assert_equal(12,an.y.mean)        
        assert_equal(13,an.z.mean)        
      end
      
      should "detach() without arguments drop latest object" do
        an=Statsample::Analysis::Suite.new(:summary)
        ds1={'x'=>stub(:mean=>100),'y'=>stub(:mean=>120),'z'=>stub(:mean=>13)}
        ds1.expects(:fields).returns(%w{x y z}).at_least_once
        ds2={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}        
        ds2.expects(:fields).returns(%w{x y}).at_least_once
        an.attach(ds1)
        an.attach(ds2)
        assert_equal(10,an.x.mean)
        an.detach
        assert_equal(100, an.x.mean)
      end
      should "detach() with argument drop select object" do
        an=Statsample::Analysis::Suite.new(:summary)
        ds1={'x'=>1}
        ds1.expects(:fields).returns(%w{x}).at_least_once
        ds2={'x'=>2,'y'=>3}
        ds2.expects(:fields).returns(%w{x y}).at_least_once
        ds3={'y'=>4}
        ds3.expects(:fields).returns(%w{y}).at_least_once
        
        an.attach(ds3)
        an.attach(ds2)
        an.attach(ds1)
        assert_equal(1,an.x)
        assert_equal(3,an.y)
        an.detach(ds2)
        assert_equal(4,an.y)
      end
      should "perform a simple analysis" do
        output=mock()
        output.expects(:puts).with(5.5)
        an=Statsample::Analysis.store(:simple, :output=>output) do
          ds=data_frame(:x=>vector(1..10),:y=>vector(1..10))
          attach(ds)
          echo x.mean
        end
        an.run
      end
    end
    context(Statsample::Analysis::SuiteReportBuilder) do
      should "echo() use add on rb object" do
        an=Statsample::Analysis::SuiteReportBuilder.new(:puts_to_add)
        an.rb.expects(:add).with(:first).twice
        an.echo(:first, :first)
      end
      should "summary() uses add on rb object" do
        an=Statsample::Analysis::SuiteReportBuilder.new(:summary_to_add)
        an.rb.expects(:add).with(:first).once
        an.summary(:first)
      end
    end
    
  end
end


================================================
FILE: test/test_anova_contrast.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleAnovaContrastTestCase < MiniTest::Unit::TestCase
  context(Statsample::Anova::Contrast) do
    setup do
      constant=[12,13,11,12,12].to_scale
      frequent=[9,10,9,13,14].to_scale
      infrequent=[15,16,17,16,16].to_scale
      never=[17,18,12,18,20].to_scale
      @vectors=[constant, frequent, infrequent, never]
      @c=Statsample::Anova::Contrast.new(:vectors=>@vectors)
    end
    should "return correct value using c" do
      @c.c([1,-1.quo(3),-1.quo(3),-1.quo(3)])
      #@c.c([1,-0.333,-0.333,-0.333])
      assert_in_delta(-2.6667, @c.psi, 0.0001)
      assert_in_delta(1.0165, @c.se, 0.0001)
      assert_in_delta(-2.623, @c.t, 0.001)
      assert_in_delta(-4.82, @c.confidence_interval[0],0.01)
      assert_in_delta(-0.51, @c.confidence_interval[1],0.01)
      assert(@c.summary.size>0)
    end
    should "return correct values using c_by_index" do
      @c.c_by_index([0],[1,2,3])
      assert_in_delta(-2.6667, @c.psi, 0.0001)
      assert_in_delta(1.0165, @c.se, 0.0001)
      assert_in_delta(-2.623, @c.t, 0.001)
    end
    should "return correct values using incomplete c_by_index" do
      c1=Statsample::Anova::Contrast.new(:vectors=>@vectors, :c=>[0.5,0.5,-1,0])
      c2=Statsample::Anova::Contrast.new(:vectors=>@vectors, :c1=>[0,1],:c2=>[2])
      assert_equal(c1.psi,c2.psi)
      assert_equal(c1.se,c2.se)
      assert_equal(c1.t,c2.t)
    end
  end
end


================================================
FILE: test/test_anovaoneway.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleAnovaOneWayTestCase < MiniTest::Unit::TestCase
  context(Statsample::Anova::OneWay) do
    setup do
      @ss_num=30.08
      @ss_den=87.88
      @df_num=2
      @df_den=21
      @anova=Statsample::Anova::OneWay.new(:ss_num=>@ss_num, :ss_den=>@ss_den, :df_num=>@df_num, :df_den=>@df_den)
    end
    should "Statsample::Anova.oneway respond to #oneway" do
      assert(Statsample::Anova.respond_to? :oneway)
    end
    should "return correct value for ms_num and ms_den" do
      assert_in_delta(15.04, @anova.ms_num, 0.01)
      assert_in_delta(4.18, @anova.ms_den, 0.01)
    end
    should "return correct value for f" do
      assert_in_delta(3.59, @anova.f, 0.01)
    end
    should "respond to summary" do
      assert(@anova.respond_to? :summary)
      assert(@anova.summary.size>0)
    end
  end
end


================================================
FILE: test/test_anovatwoway.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleAnovaTwoWayTestCase < MiniTest::Unit::TestCase
  context(Statsample::Anova::TwoWay) do
    setup do
      @ss_a=192.2
      @ss_b=57.8
      @ss_axb=168.2
      @ss_within=75.6
      @df_a=@df_b=1
      @df_within=16
      @anova=Statsample::Anova::TwoWay.new(:ss_a=>@ss_a, :ss_b=>@ss_b, :ss_axb=>@ss_axb, :ss_within=>@ss_within , :df_a=>@df_a, :df_b=>@df_b, :df_within=>@df_within)
    end
    should "Statsample::Anova.twoway respond to #twoway" do
    assert(Statsample::Anova.respond_to? :twoway)
    end
    should "return correct value for ms_a, ms_b and ms_axb" do
      assert_in_delta(192.2, @anova.ms_a, 0.01)
      assert_in_delta(57.8, @anova.ms_b, 0.01)
      assert_in_delta(168.2, @anova.ms_axb, 0.01)
      
    end
    should "return correct value for f " do
      assert_in_delta(40.68, @anova.f_a, 0.01)
      assert_in_delta(12.23, @anova.f_b, 0.01)
      assert_in_delta(35.60, @anova.f_axb, 0.01)
    end
    should "return correct value for probability for f " do
      assert(@anova.f_a_probability < 0.05)
      assert(@anova.f_b_probability < 0.05)
      assert(@anova.f_axb_probability < 0.05)
    end

    should "respond to summary" do
      assert(@anova.respond_to? :summary)
      assert(@anova.summary.size>0)
    end
  end
end


================================================
FILE: test/test_anovatwowaywithdataset.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
# Reference:
# * http://www.uwsp.edu/psych/Stat/13/anova-2w.htm#III
class StatsampleAnovaTwoWayWithVectorsTestCase < MiniTest::Unit::TestCase
  context(Statsample::Anova::TwoWayWithVectors) do
    setup do
      @pa=[5,4,3,4,2,18,19,14,12,15,6,7,5,8,4,6,9,5,9,3].to_scale
      @pa.name="Passive Avoidance"
      @a=[0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1].to_vector
      @a.labels={0=>'0%',1=>'35%'}
      @a.name='Diet'
      @b=[0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1].to_vector
      @b.labels={0=>'Young',1=>'Older'}
      @b.name="Age"
      @anova=Statsample::Anova::TwoWayWithVectors.new(:a=>@a,:b=>@b, :dependent=>@pa)
    end
    should "Statsample::Anova respond to #twoway_with_vectors" do
    assert(Statsample::Anova.respond_to? :twoway_with_vectors)
    end
    should "#new returns the same as Statsample::Anova.twoway_with_vectors" do
      @anova2=Statsample::Anova.twoway_with_vectors(:a=>@a,:b=>@b, :dependent=>@pa)
      assert_equal(@anova.summary, @anova2.summary)
    end
    should "return correct value for ms_a, ms_b and ms_axb" do
      assert_in_delta(192.2, @anova.ms_a, 0.01)
      assert_in_delta(57.8, @anova.ms_b, 0.01)
      assert_in_delta(168.2, @anova.ms_axb, 0.01)
      
    end
    should "return correct value for f " do
      assert_in_delta(40.68, @anova.f_a, 0.01)
      assert_in_delta(12.23, @anova.f_b, 0.01)
      assert_in_delta(35.60, @anova.f_axb, 0.01)
    end
    should "return correct value for probability for f " do
      assert(@anova.f_a_probability < 0.05)
      assert(@anova.f_b_probability < 0.05)
      assert(@anova.f_axb_probability < 0.05)
    end

    should "respond to summary" do
      
      @anova.summary_descriptives=true
      @anova.summary_levene=true
      assert(@anova.respond_to? :summary)
      assert(@anova.summary.size>0)
    end
  end
end


================================================
FILE: test/test_anovawithvectors.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleAnovaOneWayWithVectorsTestCase < MiniTest::Unit::TestCase
  context(Statsample::Anova::OneWayWithVectors) do
    
    context("when initializing") do
      setup do
        @v1=10.times.map {rand(100)}.to_scale
        @v2=10.times.map {rand(100)}.to_scale
        @v3=10.times.map {rand(100)}.to_scale
      end
      should "be the same using [] or args*" do
        a1=Statsample::Anova::OneWayWithVectors.new(@v1,@v2,@v3)
        a2=Statsample::Anova::OneWayWithVectors.new([@v1,@v2,@v3])
        assert_equal(a1.f,a2.f)
      end
      should "be the same using module method or object instantiation" do
        a1=Statsample::Anova::OneWayWithVectors.new(@v1,@v2,@v3)
        a2=Statsample::Anova.oneway_with_vectors(@v1,@v2,@v3)
        assert_equal(a1.f,a2.f)
      end
      should "detect optional hash" do
        a1=Statsample::Anova::OneWayWithVectors.new(@v1,@v2,@v3, {:name=>'aaa'})
        assert_equal('aaa', a1.name)
      end
      should "omit incorrect arguments" do
        a1=Statsample::Anova::OneWayWithVectors.new(@v1,@v2,@v3, {:name=>'aaa'})
        a2=Statsample::Anova::OneWayWithVectors.new(@v1,nil,nil,@v2,@v3, {:name=>'aaa'})
        assert_equal(a1.f,a2.f)
      end
    end
    setup do
      @v1=[3,3,2,3,6].to_vector(:scale)
      @v2=[7,6,5,6,7].to_vector(:scale)
      @v3=[9,8,9,7,8].to_vector(:scale)
      @name="Anova testing"
      @anova=Statsample::Anova::OneWayWithVectors.new(@v1,@v2,@v3, :name=>@name)
    end
    should "store correctly contrasts" do
      c1=Statsample::Anova::Contrast.new(:vectors=>[@v1,@v2,@v3], :c=>[1,-0.5, -0.5])
      
      c2=@anova.contrast(:c=>[1,-0.5,-0.5])
      assert_equal(c1.t,c2.t)
      
    end
    should "respond to #summary" do
      assert(@anova.respond_to? :summary)
    end
    should "have correct name of analysis on #summary" do
      assert_match(/#{@name}/, @anova.summary)
    end
    should "returns same levene values as direct Levene creation" do
      assert_equal(@anova.levene.f, Statsample::Test.levene([@v1,@v2,@v3]).f)
    end
    should "have correct value for levene" do
      assert_in_delta(0.604,@anova.levene.f, 0.001)
      assert_in_delta(0.562,@anova.levene.probability, 0.001)
    end
    should "have correct value for sst" do
     assert_in_delta(72.933, @anova.sst,0.001)
    end
    should "have correct value for sswg" do
      assert_in_delta(14.8,@anova.sswg,0.001)
    end
    should "have correct value for ssb" do
      assert_in_delta(58.133,@anova.ssbg,0.001)
    end
    should "sst=sswg+ssbg" do
      assert_in_delta(@anova.sst,@anova.sswg+@anova.ssbg,0.00001)
    end
    should "df total equal to number of n-1" do
      assert_equal(@v1.n+@v2.n+@v3.n-1,@anova.df_total)
    end
    should "df wg equal to number of n-k" do
      assert_equal(@v1.n+@v2.n+@v3.n-3,@anova.df_wg)
    end
    should "df bg equal to number of k-1" do
      assert_equal(2,@anova.df_bg)
    end
    should "f=(ssbg/df_bg)/(sswt/df_wt)" do
      assert_in_delta((@anova.ssbg.quo(@anova.df_bg)).quo( @anova.sswg.quo(@anova.df_wg)), @anova.f, 0.001)
    end
    should "p be correct" do
      assert(@anova.probability<0.01)
    end
    should "be correct using different test values" do
      anova2=Statsample::Anova::OneWayWithVectors.new([@v1,@v1,@v1,@v1,@v2])
      assert_in_delta(3.960, anova2.f,0.001)
      assert_in_delta(0.016, anova2.probability,0.001)
    end
    context "with extra information on summary" do
      setup do
        @anova.summary_descriptives=true
        @anova.summary_levene=true
        @summary=@anova.summary
      end
      should "have section with levene statistics" do
        assert_match(/Levene/, @summary)
      end
      should "have section with descriptives" do
        assert_match(/Min/, @summary)
      end
    end
  end
end


================================================
FILE: test/test_awesome_print_bug.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleAwesomePrintBug < MiniTest::Test
  context("Awesome Print integration") do
    setup do
      require "awesome_print"
    end
    should "should be flawless" do
      a=[1,2,3].to_scale
      
      assert(a!=[1,2,3])
      assert_nothing_raised do 
        ap a
      end
    end
  end
end


================================================
FILE: test/test_bartlettsphericity.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleBartlettSphericityTestCase < MiniTest::Test
  include Statsample::Test
  context Statsample::Test::BartlettSphericity do
    setup do
      @v1=[1 ,2 ,3 ,4 ,7 ,8 ,9 ,10,14,15,20,50,60,70].to_scale
      @v2=[5 ,6 ,11,12,13,16,17,18,19,20,30,0,0,0].to_scale
      @v3=[10,3 ,20,30,40,50,80,10,20,30,40,2,3,4].to_scale
      # KMO: 0.490
      ds={'v1'=>@v1,'v2'=>@v2,'v3'=>@v3}.to_dataset
      cor=Statsample::Bivariate.correlation_matrix(ds)
      @bs=Statsample::Test::BartlettSphericity.new(cor, 14)
    end
    should "have correct value for chi" do
      assert_in_delta(9.477, @bs.value,0.001)
    end
    should "have correct value for df" do
      assert_equal(3, @bs.df)
    end
    should "have correct value for probability" do
      assert_in_delta(0.024,@bs.probability,0.001)
    end
  end
end


================================================
FILE: test/test_bivariate.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleBivariateTestCase < MiniTest::Test
  should "method sum of squares should be correct" do
    v1=[1,2,3,4,5,6].to_vector(:scale)
    v2=[6,2,4,10,12,8].to_vector(:scale)
    assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1,v2))
  end
  should_with_gsl "return same covariance with ruby and gls implementation" do
    v1=20.times.collect {|a| rand()}.to_scale
    v2=20.times.collect {|a| rand()}.to_scale
    assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
  end

  should_with_gsl "return same correlation with ruby and gls implementation" do
    v1=20.times.collect {|a| rand()}.to_scale
    v2=20.times.collect {|a| rand()}.to_scale

    assert_in_delta(GSL::Stats::correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1,v2), 1e-10)
  end
  should "return correct pearson correlation" do
    v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
    v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
    assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
    assert_in_delta(0.525,Statsample::Bivariate.pearson_slow(v1,v2), 0.001)

    v3=[6,2,  1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
    v4=[2,nil,nil,nil,  3,7,8,6,4,3,2,500].to_vector(:scale)
    assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
    # Test ruby method
    v3a,v4a=Statsample.only_valid v3, v4
    assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a,v4a),0.001)
  end
  should "return correct values for t_pearson and prop_pearson" do
    v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
    v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
    r=Statsample::Bivariate::Pearson.new(v1,v2)
    assert_in_delta(0.525,r.r, 0.001)
    assert_in_delta(Statsample::Bivariate.t_pearson(v1,v2), r.t, 0.001)
    assert_in_delta(Statsample::Bivariate.prop_pearson(r.t,8,:both), r.probability, 0.001)
    assert(r.summary.size>0)
  end
  should "return correct correlation_matrix with nils values" do
    v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
    v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
    v3=[6,2,  1000,1000,5,4,7,8].to_vector(:scale)
    v4=[2,nil,nil,nil,  3,7,8,6].to_vector(:scale)
    ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
    c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
    expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
      [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
    ]
    obt=Statsample::Bivariate.correlation_matrix(ds)
    for i in 0...expected.row_size
      for j in 0...expected.column_size
        #puts expected[i,j].inspect
        #puts obt[i,j].inspect
        assert_in_delta(expected[i,j], obt[i,j],0.0001, "#{expected[i,j].class}!=#{obt[i,j].class}  ")
      end
    end
    #assert_equal(expected,obt)
  end
  should_with_gsl "return same values for optimized and pairwise covariance matrix" do
      cases=100
      v1=Statsample::Vector.new_scale(cases) {rand()}
      v2=Statsample::Vector.new_scale(cases) {rand()}
      v3=Statsample::Vector.new_scale(cases) {rand()}
      v4=Statsample::Vector.new_scale(cases) {rand()}
      v5=Statsample::Vector.new_scale(cases) {rand()}

      ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'v5'=>v5}.to_dataset
      
      cor_opt=Statsample::Bivariate.covariance_matrix_optimized(ds)
      
      cor_pw =Statsample::Bivariate.covariance_matrix_pairwise(ds)
      assert_equal_matrix(cor_opt,cor_pw,1e-15)
  end
  should_with_gsl "return same values for optimized and pairwise correlation matrix" do
    
    cases=100
    v1=Statsample::Vector.new_scale(cases) {rand()}
    v2=Statsample::Vector.new_scale(cases) {rand()}
    v3=Statsample::Vector.new_scale(cases) {rand()}
    v4=Statsample::Vector.new_scale(cases) {rand()}
    v5=Statsample::Vector.new_scale(cases) {rand()}

    ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'v5'=>v5}.to_dataset
    
    cor_opt=Statsample::Bivariate.correlation_matrix_optimized(ds)
    
    cor_pw =Statsample::Bivariate.correlation_matrix_pairwise(ds)
    assert_equal_matrix(cor_opt,cor_pw,1e-15)
    
  end
  should "return correct correlation_matrix without nils values" do
    v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
    v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
    v3=[6,2,  1000,1000,5,4,7,8].to_vector(:scale)
    v4=[2,4,6,7,  3,7,8,6].to_vector(:scale)
    ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
    c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
    expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
      [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
    ]
    obt=Statsample::Bivariate.correlation_matrix(ds)
    for i in 0...expected.row_size
      for j in 0...expected.column_size
        #puts expected[i,j].inspect
        #puts obt[i,j].inspect
        assert_in_delta(expected[i,j], obt[i,j],0.0001, "#{expected[i,j].class}!=#{obt[i,j].class}  ")
      end
    end
    #assert_equal(expected,obt)
  end

  
  should "return correct value for prop pearson" do
    assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
    assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
    r=0.9
    n=100
    t=Statsample::Bivariate.t_r(r,n)
    assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
    assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
    assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)

    r=-0.9
    n=100
    t=Statsample::Bivariate.t_r(r,n)
    assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
    assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
    assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
  end

  should "return correct value for Spearman's rho" do
    v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
    v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
    assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)

  end
  should "return correct value for point_biserial correlation" do
    c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
    d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
    assert_raises TypeError do
      Statsample::Bivariate.point_biserial(c,d)
    end
    assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
  end
  should "return correct value for tau_a and tau_b" do
    v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
    v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
    assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
    assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
    v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
    v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
    assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
  end
  should "return correct value for gamma correlation" do
    m=Matrix[[10,5,2],[10,15,20]]
    assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
    m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
    assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
  end
end


================================================
FILE: test/test_codification.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleCodificationTestCase < MiniTest::Unit::TestCase

  def initialize(*args)
    v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
    @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s', 'dream'=>'d', 'dreaming'=>'d'}
    @ds={"v1"=>v1}.to_dataset
    super
  end
  def test_create_hash
    expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
    hash=Statsample::Codification.create_hash(@ds,['v1'])
    assert_equal(['v1'],hash.keys)
    assert_equal(expected_keys_v1,hash['v1'].keys.sort)
    assert_equal(expected_keys_v1,hash['v1'].values.sort)
  end
  def test_create_excel
    filename=Dir::tmpdir+"/test_excel"+Time.now().to_s+".xls"
    #filename = Tempfile.new("test_codification_"+Time.now().to_s)
    Statsample::Codification.create_excel(@ds, ['v1'], filename)
    field=(["v1"]*8).to_vector
    keys=%w{dream dreaming run running sleep sleeping walk walking}.to_vector
    ds=Statsample::Excel.read(filename)
    assert_equal(field, ds['field'])
    assert_equal(keys, ds['original'])
    assert_equal(keys, ds['recoded'])
    hash=Statsample::Codification.excel_to_recoded_hash(filename)
    assert_equal(keys.data, hash['v1'].keys.sort)
    assert_equal(keys.data, hash['v1'].values.sort)

  end
  def test_create_yaml
    assert_raise  ArgumentError do
      Statsample::Codification.create_yaml(@ds,[])
    end
    expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
    yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
    h=YAML::load(yaml_hash)
    assert_equal(['v1'],h.keys)
    assert_equal(expected_keys_v1,h['v1'].keys.sort)
    tf = Tempfile.new("test_codification")
    yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],tf, Statsample::SPLIT_TOKEN)
    tf.close
    tf.open
    h=YAML::load(tf)
    assert_equal(['v1'],h.keys)
    assert_equal(expected_keys_v1,h['v1'].keys.sort)
    tf.close(true)
  end
  def test_recodification
    expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
    assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
    v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
    expected=[['r'],['w','d'],nil,['w','d']]
    assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
  end
  def test_recode_dataset_simple
    Statsample::Codification.recode_dataset_simple!(@ds,{'v1'=>@dict})
    expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
    assert_not_equal(expected_vector,@ds['v1'])
    assert_equal(expected_vector,@ds['v1_recoded'])
  end
  def test_recode_dataset_split
    Statsample::Codification.recode_dataset_split!(@ds,{'v1'=>@dict})
    e={}
    e['r']=[1,1,0,1,0,0,0].to_vector
    e['w']=[0,1,1,0,0,0,0].to_vector
    e['s']=[0,0,0,0,1,1,1].to_vector
    e['d']=[0,0,0,0,0,1,1].to_vector
    e.each{|k,expected|
      assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")

    }
  end

end


================================================
FILE: test/test_crosstab.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleCrosstabTestCase < MiniTest::Unit::TestCase

  def initialize(*args)
    @v1=%w{black blonde black black red black brown black blonde black red black blonde}.to_vector
    @v2=%w{woman man man woman man man man woman man woman woman man man}.to_vector
    @ct=Statsample::Crosstab.new(@v1,@v2)
    super
  end
  def test_crosstab_errors
    e1=%w{black blonde black black red black brown black blonde black}
    assert_raise ArgumentError do
      Statsample::Crosstab.new(e1,@v2)
    end
    e2=%w{black blonde black black red black brown black blonde black black}.to_vector

    assert_raise ArgumentError do
      Statsample::Crosstab.new(e2,@v2)
    end
    assert_nothing_raised do
      Statsample::Crosstab.new(@v1,@v2)
    end
  end
  def test_crosstab_basic
    assert_equal(%w{black blonde brown red}, @ct.rows_names)
    assert_equal(%w{man woman}, @ct.cols_names)
    assert_equal({'black'=>7,'blonde'=>3,'red'=>2,'brown'=>1}, @ct.rows_total)
    assert_equal({'man'=>8,'woman'=>5}, @ct.cols_total)
  end
  def test_crosstab_frequencies
    fq=@ct.frequencies
    assert_equal(8,fq.size)
    sum=fq.inject(0) {|s,x| s+x[1]}
    assert_equal(13,sum)
    fr=@ct.frequencies_by_row
    assert_equal(4,fr.size)
    assert_equal(%w{black blonde brown red},fr.keys.sort)
    fc=@ct.frequencies_by_col
    assert_equal(2,fc.size)
    assert_equal(%w{man woman},fc.keys.sort)
    assert_equal(Matrix.rows([[3,4],[3,0],[1,0],[1,1]]),@ct.to_matrix)
  end
  def test_summary
    @ct.percentage_row=true
    @ct.percentage_column=true
    @ct.percentage_total=true
    assert(@ct.summary.size>0)
  end
  def test_expected
    v1=%w{1 1 1 1 1 0 0 0 0 0}.to_vector
    v2=%w{0 0 0 0 0 1 1 1 1 1}.to_vector
    ct=Statsample::Crosstab.new(v1,v2)
    assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
  end
  def test_crosstab_with_scale
    v1=%w{1 1 1 1 1 0 0 0 0 0}.to_scale
    v2=%w{0 0 0 0 0 1 1 1 1 1}.to_scale
    ct=Statsample::Crosstab.new(v1,v2)
    assert_equal(Matrix[[0,5],[5,0]],ct.to_matrix)
    assert_nothing_raised { ct.summary }  
  end

end


================================================
FILE: test/test_csv.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleCSVTestCase < MiniTest::Unit::TestCase
  def setup
    @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
  end
  def test_read
    assert_equal(6,@ds.cases)
    assert_equal(%w{id name age city a1}, @ds.fields)
    id=[1,2,3,4,5,6].to_vector(:scale)
    name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
    age=[20,23,25,27,5.5,nil].to_vector(:scale)
    city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
    a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
    ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
    ds_exp.fields.each{|f|
      assert_equal(ds_exp[f],@ds[f])
    }
    assert_equal(ds_exp,@ds)
  end
  def test_nil
    assert_equal(nil,@ds['age'][5])
  end
  def test_repeated
    ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
    assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
    age=[3,4,5,6,nil,8].to_vector(:scale)
    assert_equal(age,ds['age_2'])
  end
  def test_write
    filename=Tempfile.new("afile")
    #  filename=Dir::tmpdir+"/test_write.csv"
    Statsample::CSV.write(@ds, filename.path)
    ds2=Statsample::CSV.read(filename.path)
    i=0
    ds2.each_array{|row|
      assert_equal(@ds.case_as_array(i),row)
      i+=1
    }
  end
end
=begin
class StatsampleCSVTestCase2 < MiniTest::Unit::TestCase
  def setup
    @ds=Statsample::CSV.read19(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
  end
  def test_read
    assert_equal(6,@ds.cases)
    assert_equal(%w{id name age city a1}, @ds.fields)
    id=[1,2,3,4,5,6].to_vector(:scale)
    name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
    age=[20,23,25,27,5.5,nil].to_vector(:scale)
    city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
    a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
    ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
    ds_exp.fields.each{|f|
      assert_equal(ds_exp[f],@ds[f])
    }
    assert_equal(ds_exp,@ds)
  end
  def test_nil
    assert_equal(nil,@ds['age'][5])
  end
  def test_repeated
    ds=Statsample::CSV.read19(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
    assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
    age=[3,4,5,6,nil,8].to_vector(:scale)
    assert_equal(age,ds['age_2'])
  end
  def test_write
    filename=Tempfile.new("afile")
    #  filename=Dir::tmpdir+"/test_write.csv"
    Statsample::CSV.write(@ds, filename.path)
    ds2=Statsample::CSV.read19(filename.path)
    i=0
    ds2.each_array{|row|
      assert_equal(@ds.case_as_array(i),row)
      i+=1
    }
  end
end
=end


================================================
FILE: test/test_dataset.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
  def setup
    @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
      'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
    'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
  end
  def test_nest
    ds={
      'a'=>%w{a a a b b b}.to_vector,
      'b'=>%w{c c d d e e}.to_vector,
      'c'=>%w{f g h i j k}.to_vector
    }.to_dataset
    nest=ds.nest('a','b')
    assert_equal([{'c'=>'f'},{'c'=>'g'}], nest['a']['c'])
    assert_equal([{'c'=>'h'}], nest['a']['d'])
    assert_equal([{'c'=>'j'},{'c'=>'k'}], nest['b']['e'])
   
  end
  def test_should_have_summary
    assert(@ds.summary.size>0)
  end
  def test_basic
    assert_equal(5,@ds.cases)
    assert_equal(%w{id name age city a1}, @ds.fields)
  end
  def test_saveload
    outfile=Tempfile.new("dataset.ds")
    @ds.save(outfile.path)
    a=Statsample.load(outfile.path)
    assert_equal(@ds,a)
  end
  def test_gsl
    if Statsample.has_gsl?
      matrix=GSL::Matrix[[1,2],[3,4],[5,6]]
      ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
      assert_equal(matrix,ds.to_gsl)
    else
      skip("Gsl needed")
    end
  end
  def test_matrix
    matrix=Matrix[[1,2],[3,4],[5,6]]
    ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
    assert_equal(matrix,ds.to_matrix)
  end

  def test_fields
    @ds.fields=%w{name a1 id age city}
    assert_equal(%w{name a1 id age city}, @ds.fields)
    @ds.fields=%w{id name age}
    assert_equal(%w{id name age a1 city}, @ds.fields)
  end
  def test_merge
    a=[1,2,3].to_scale
    b=[3,4,5].to_vector
    c=[4,5,6].to_scale
    d=[7,8,9].to_vector
    e=[10,20,30].to_vector
    ds1={'a'=>a,'b'=>b}.to_dataset
    ds2={'c'=>c,'d'=>d}.to_dataset
    exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset

    assert_equal(exp,ds1.merge(ds2))
    exp.fields=%w{c d a b}
    assert_equal(exp,ds2.merge(ds1))
    ds3={'a'=>e}.to_dataset
    exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
    exp.fields=%w{a_1 b a_2}
    assert_equal(exp,ds1.merge(ds3))
  end
  def test_each_vector
    a=[1,2,3].to_vector
    b=[3,4,5].to_vector
    fields=["a","b"]
    ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
    res=[]
    ds.each_vector{|k,v|
      res.push([k,v])
    }
    assert_equal([["a",a],["b",b]],res)
    ds.fields=["b","a"]
    res=[]
    ds.each_vector{|k,v|
      res.push([k,v])
    }
    assert_equal([["b",b],["a",a]],res)
  end
  def test_equality
    v1=[1,2,3,4].to_vector
    v2=[5,6,7,8].to_vector
    ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
    v3=[1,2,3,4].to_vector
    v4=[5,6,7,8].to_vector
    ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
    assert_equal(ds1,ds2)
    ds2.fields=%w{v1 v2}
    assert_not_equal(ds1,ds2)
  end
  def test_add_vector
    v=Statsample::Vector.new(%w{a b c d e})
    @ds.add_vector('new',v)
    assert_equal(%w{id name age city a1 new},@ds.fields)
    x=Statsample::Vector.new(%w{a b c d e f g})
    assert_raise ArgumentError do
      @ds.add_vector('new2',x)
    end
  end
  def test_vector_by_calculation
    a1=[1,2,3,4,5,6,7].to_vector(:scale)
    a2=[10,20,30,40,50,60,70].to_vector(:scale)
    a3=[100,200,300,400,500,600,700].to_vector(:scale)
    ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
    total=ds.vector_by_calculation() {|row|
      row['a1']+row['a2']+row['a3']
    }
    expected=[111,222,333,444,555,666,777].to_vector(:scale)
    assert_equal(expected,total)
  end
  def test_vector_sum
    a1=[1  ,2 ,3 ,4  , 5,nil].to_vector(:scale)
    a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
    b1=[nil,1 ,1 ,1  ,1 ,2].to_vector(:scale)
    b2=[2  ,2 ,2 ,nil,2 ,3].to_vector(:scale)
    ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
    total=ds.vector_sum
    a=ds.vector_sum(['a1','a2'])
    b=ds.vector_sum(['b1','b2'])
    expected_a=[11,12,23,24,25,nil].to_vector(:scale)
    expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
    expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
    assert_equal(expected_a, a)
    assert_equal(expected_b, b)
    assert_equal(expected_total, total)
  end
  def test_vector_missing_values
    a1=[1  ,nil ,3 ,4  , 5,nil].to_vector(:scale)
    a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
    b1=[nil,nil ,1 ,1  ,1 ,2].to_vector(:scale)
    b2=[2  ,2   ,2 ,nil,2 ,3].to_vector(:scale)
    c= [nil,2   , 4,2   ,2 ,2].to_vector(:scale)
    ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
    mva=[2,3,0,1,0,1].to_vector(:scale)
    assert_equal(mva,ds.vector_missing_values)
  end
  
  def test_has_missing_values
    a1=[1  ,nil ,3 ,4  , 5,nil].to_vector(:scale)
    a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
    b1=[nil,nil ,1 ,1  ,1 ,2].to_vector(:scale)
    b2=[2  ,2   ,2 ,nil,2 ,3].to_vector(:scale)
    c= [nil,2   , 4,2   ,2 ,2].to_vector(:scale)
    ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
    assert(ds.has_missing_data?)
    clean=ds.dup_only_valid
    assert(!clean.has_missing_data?)
  end
  
  
  def test_vector_count_characters
    a1=[1  ,"abcde"  ,3  ,4  , 5,nil].to_vector(:scale)
    a2=[10 ,20.3     ,20 ,20 ,20,30].to_vector(:scale)
    b1=[nil,"343434" ,1  ,1  ,1 ,2].to_vector(:scale)
    b2=[2  ,2        ,2  ,nil,2 ,3].to_vector(:scale)
    c= [nil,2        ,"This is a nice example",2   ,2 ,2].to_vector(:scale)
    ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
    exp=[4,17,27,5,6,5].to_vector(:scale)
    assert_equal(exp,ds.vector_count_characters)

  end
  def test_vector_mean
    a1=[1  ,2 ,3 ,4  , 5,nil].to_vector(:scale)
    a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
    b1=[nil,1 ,1 ,1  ,1 ,2].to_vector(:scale)
    b2=[2  ,2 ,2 ,nil,2 ,3].to_vector(:scale)
    c= [nil,2, 4,2   ,2 ,2].to_vector(:scale)
    ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
    total=ds.vector_mean
    a=ds.vector_mean(['a1','a2'],1)
    b=ds.vector_mean(['b1','b2'],1)
    c=ds.vector_mean(['b1','b2','c'],1)
    expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
    expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
    expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
    expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
    assert_equal(expected_a, a)
    assert_equal(expected_b, b)
    assert_equal(expected_c, c)
    assert_equal(expected_total, total)
  end

  def test_each_array
    expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
    out=[]
    @ds.each_array{ |a|
      out.push(a)
    }
    assert_equal(expected,out)
  end
  def test_recode
    @ds['age'].type=:scale
    @ds.recode!("age") {|c| c['id']*2}
    expected=[2,4,6,8,10].to_vector(:scale)
    assert_equal(expected,@ds['age'])
  end
  def test_case_as
    assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
    assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
    # Native methods
    assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
    assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))


  end
  def test_delete_vector
    @ds.delete_vector('name')
    assert_equal(%w{id age city a1},@ds.fields)
    assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
  end
  def test_change_type
    @ds.col('age').type=:scale
    assert_equal(:scale,@ds.col('age').type)
  end
  def test_split_by_separator_recode
    @ds.add_vectors_by_split_recode("a1","_")
    assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
    assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
    assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
    assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
    {'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'}.each do |k,v|
      assert_equal(v, @ds[k].name)
    end
  end
  def test_split_by_separator
    @ds.add_vectors_by_split("a1","_")
    assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
    assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
    assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
    assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
  end
  def test_percentiles
    v1=(1..100).to_a.to_scale
    assert_equal(50.5,v1.median)
    assert_equal(25.5, v1.percentil(25))
    v2=(1..99).to_a.to_scale
    assert_equal(50,v2.median)
    assert_equal(25,v2.percentil(25))
    v3=(1..50).to_a.to_scale
    assert_equal(25.5, v3.median)
    assert_equal(13, v3.percentil(25))

  end
  def test_add_case
    ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
    ds.add_case([1,2,3])
    ds.add_case({'a'=>4,'b'=>5,'c'=>6})
    ds.add_case([[7,8,9],%w{a b c}])
    assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
    assert_equal([4,5,6],ds.case_as_array(1))
    assert_equal([7,8,9],ds.case_as_array(2))
    assert_equal(['a','b','c'],ds.case_as_array(3))
    ds.add_case_array([6,7,1])
    ds.update_valid_data
    assert_equal([6,7,1],ds.case_as_array(4))

  end
  def test_marshaling
    ds_marshal=Marshal.load(Marshal.dump(@ds))
    assert_equal(ds_marshal,@ds)
  end
  def test_range
    v1=[1,2,3,4].to_vector
    v2=[5,6,7,8].to_vector
    v3=[9,10,11,12].to_vector
    ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
    assert_same(v1,ds1['v1'])
    ds2=ds1["v2".."v1"]
    assert_equal(%w{v2 v1},ds2.fields)
    assert_same(ds1['v1'],ds2['v1'])
    assert_same(ds1['v2'],ds2['v2'])


  end
  def test_clone
    v1=[1,2,3,4].to_vector
    v2=[5,6,7,8].to_vector
    ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
    ds2=ds1.clone
    assert_equal(ds1,ds2)
    assert_not_same(ds1,ds2)
    assert_equal(ds1['v1'],ds2['v1'])
    assert_same(ds1['v1'], ds2['v1'])
    assert_equal(ds1.fields,ds2.fields)
    assert_not_same(ds1.fields,ds2.fields)
    assert_equal(ds1.cases,ds2.cases)

    # partial clone
    ds3=ds1.clone('v1')
    ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
    assert_equal(ds_exp,ds3)
    assert_not_same(ds_exp,ds3)
    assert_equal(ds3['v1'],ds_exp['v1'])
    assert_same(ds3['v1'],ds_exp['v1'])
    assert_equal(ds3.fields,ds_exp.fields)
    assert_equal(ds3.cases,ds_exp.cases)

    assert_not_same(ds3.fields,ds_exp.fields)
     
  end
  def test_dup
    v1=[1,2,3,4].to_vector
    v2=[5,6,7,8].to_vector
    ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
    ds2=ds1.dup
    assert_equal(ds1,ds2)
    assert_not_same(ds1,ds2)
    assert_equal(ds1['v1'],ds2['v1'])
    assert_not_same(ds1['v1'],ds2['v1'])
    assert_equal(ds1.cases,ds2.cases)

    assert_equal(ds1.fields,ds2.fields)
    assert_not_same(ds1.fields,ds2.fields)
    ds1['v1'].type=:scale
    # dup partial
    ds3=ds1.dup('v1')
    ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
    assert_equal(ds_exp,ds3)
    assert_not_same(ds_exp,ds3)
    assert_equal(ds3['v1'],ds_exp['v1'])
    assert_not_same(ds3['v1'],ds_exp['v1'])
    assert_equal(ds3.fields,ds_exp.fields)
    assert_equal(ds3.cases,ds_exp.cases)

    assert_not_same(ds3.fields,ds_exp.fields)


    # empty
    ds3=ds1.dup_empty
    assert_not_equal(ds1,ds3)
    assert_not_equal(ds1['v1'],ds3['v1'])
    assert_equal([],ds3['v1'].data)
    assert_equal([],ds3['v2'].data)
    assert_equal(:scale,ds3['v1'].type)
    assert_equal(ds1.fields,ds2.fields)
    assert_not_same(ds1.fields,ds2.fields)
  end
  def test_from_to
    assert_equal(%w{name age city}, @ds.from_to("name","city"))
    assert_raise ArgumentError do
      @ds.from_to("name","a2")
    end
  end
  def test_each_array_with_nils
    v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
    v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
    v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
    ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
    ds2=ds1.dup_empty
    ds1.each_array_with_nils {|row|
      ds2.add_case_array(row)
    }
    ds2.update_valid_data
    assert_equal([1,nil,3,4,nil],ds2['v1'].data)
    assert_equal([5,6,nil,8,20],ds2['v2'].data)
  end
  def test_dup_only_valid
    v1=[1,nil,3,4].to_vector(:scale)
    v2=[5,6,nil,8].to_vector(:scale)
    v3=[9,10,11,12].to_vector(:scale)
    ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
    ds2=ds1.dup_only_valid
    expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
    assert_equal(expected,ds2)
    assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
    expected_partial=Statsample::Dataset.new({'v1'=>[1,3,4].to_vector(:scale), 'v3'=>[9, 11,12].to_vector(:scale)})
    assert_equal(expected_partial, ds1.dup_only_valid(%w{v1 v3}))
    
    
  end
  def test_filter
    @ds['age'].type=:scale
    filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
    expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
      'city'=>Statsample::Vector.new(['London','Paris']),
    'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
    assert_equal(expected,filtered)
  end
  def test_filter_field
    @ds['age'].type=:scale
    filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
    expected=[2,4].to_vector
    assert_equal(expected,filtered)

  end
  def test_verify
    name=%w{r1 r2 r3 r4}.to_vector(:nominal)
    v1=[1,2,3,4].to_vector(:scale)
    v2=[4,3,2,1].to_vector(:scale)
    v3=[10,20,30,40].to_vector(:scale)
    v4=%w{a b a b}.to_vector(:nominal)
    ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
    ds.fields=%w{v1 v2 v3 v4 id}
    #Correct
    t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
    t2=create_test("v3=v1*10")  {|r| r['v3']==r['v1']*10}
    # Fail!
    t3=create_test("v4='b'") {|r| r['v4']=='b'}
    exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
    exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
    res=ds.verify(t3,t1,t2)
    assert_equal(exp1,res)
    res=ds.verify('id',t1,t2,t3)
    assert_equal(exp2,res)
  end
  def test_compute_operation
    v1=[1,2,3,4].to_vector(:scale)
    v2=[4,3,2,1].to_vector(:scale)
    v3=[10,20,30,40].to_vector(:scale)
    vscale=[1.quo(2),1,3.quo(2),2].to_vector(:scale)
    vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
    vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
    ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
    assert_equal(vscale,ds.compute("v1/2"))
    assert_equal(vsum,ds.compute("v1+v2+v3"))
    assert_equal(vmult,ds.compute("v1*v2"))

  end
  def test_crosstab_with_asignation
    v1=%w{a a a b b b c c c}.to_vector
    v2=%w{a b c a b c a b c}.to_vector
    v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
    ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
    assert_equal(:nominal, ds['_id'].type)
    assert_equal(:scale, ds['a'].type)
    assert_equal(:scale, ds['b'].type)
    ev_id=%w{a b c}.to_vector
    ev_a =%w{0 0 0}.to_scale
    ev_b =%w{1 1 0}.to_scale
    ev_c =%w{0 1 1}.to_scale
    ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
    assert_equal(ds, ds2)
  end
  def test_one_to_many
    cases=[
      ['1','george','red',10,'blue',20,nil,nil],
      ['2','fred','green',15,'orange',30,'white',20],
      ['3','alfred',nil,nil,nil,nil,nil,nil]
    ]
    ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
    cases.each {|c| ds.add_case_array c }
    ds.update_valid_data
    ids=%w{1 1 2 2 2}.to_vector
    colors=%w{red blue green orange white}.to_vector
    values=[10,20,15,30,20].to_vector
    col_ids=[1,2,1,2,3].to_scale
    ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
    assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))

  end

end


================================================
FILE: test/test_dominance_analysis.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleDominanceAnalysisTestCase < MiniTest::Unit::TestCase
  def test_dominance_univariate
    # Example from Budescu (1993)
    m=Matrix[[1, 0.683, 0.154, 0.460, 0.618],[0.683, 1, -0.050, 0.297, 0.461], [0.154, -0.050, 1, 0.006, 0.262],[0.460, 0.297, 0.006, 1, 0.507],[0.618, 0.461, 0.262, 0.507, 1]]
    m.extend Statsample::CovariateMatrix
    m.fields=%w{x1 x2 x3 x4 y}
    da=Statsample::DominanceAnalysis.new(m,'y')

    contr_x1={'x2'=>0.003, 'x3'=>0.028, 'x4'=>0.063}
    contr_x1.each  do |k,v|
      assert_in_delta(v, da.models_data[['x1']].contributions[k], 0.001)
    end
    assert_in_delta(0.052, da.models_data[['x2','x3','x4']].contributions['x1'], 0.001)
    expected_dominances=[1, 1, 0.5, 0.5, 0,0]
    expected_g_dominances=[1, 1, 1, 1, 0,0]

    da.pairs.each_with_index do |a,i|
      assert_equal(expected_dominances[i], da.total_dominance_pairwise(a[0],a[1]))
      assert_equal(expected_dominances[i], da.conditional_dominance_pairwise(a[0],a[1]))
      assert_equal(expected_g_dominances[i], da.general_dominance_pairwise(a[0],a[1]))
    end
    assert(da.summary.size>0)
  end
  def test_dominance_multivariate
    m=Matrix[[1.0, -0.19, -0.358, -0.343, 0.359, 0.257], [-0.19, 1.0, 0.26, 0.29, -0.11, -0.11], [-0.358, 0.26, 1.0, 0.54, -0.49, -0.23], [-0.343, 0.29, 0.54, 1.0, -0.22, -0.41], [0.359, -0.11, -0.49, -0.22, 1.0, 0.62], [0.257, -0.11, -0.23, -0.41, 0.62, 1]]
    m.extend Statsample::CovariateMatrix
    m.fields=%w{y1 y2 x1 x2 x3 x4}
    m2=m.submatrix(%w{y1 x1 x2 x3 x4})


    da=Statsample::DominanceAnalysis.new(m, ['y1','y2'], :cases=>683, :method_association=>:p2yx)

    contr_x1={'x2'=>0.027, 'x3'=>0.024, 'x4'=>0.017}
    contr_x1.each  do |k,v|
      assert_in_delta(v, da.models_data[['x1']].contributions[k], 0.003)
    end


  end
end


================================================
FILE: test/test_factor.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
#require 'rserve'
#require 'statsample/rserve_extension'

class StatsampleFactorTestCase < MiniTest::Unit::TestCase
  include Statsample::Fixtures
  # Based on Hardle and Simar
  def setup
    @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
  end
  # Based on Hurdle example
  def test_covariance_matrix
    ds=Statsample::PlainText.read(@fixtures_dir+"/bank2.dat", %w{v1 v2 v3 v4 v5 v6})
    ds.fields.each {|f|
      ds[f]=ds[f].centered
    }
    cm=ds.covariance_matrix
    pca =Statsample::Factor::PCA.new( cm, :m=>6)
    #puts pca.summary
    #puts pca.feature_matrix
    exp_eig=[2.985, 0.931,0.242, 0.194, 0.085, 0.035].to_scale
    assert_similar_vector(exp_eig, pca.eigenvalues.to_scale, 0.1)
    pcs=pca.principal_components(ds)
    k=6
    comp_matrix=pca.component_matrix()
    k.times {|i|
      pc_id="PC_#{i+1}"
      k.times {|j| # variable
          ds_id="v#{j+1}"
          r= Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
          assert_in_delta( r, comp_matrix[j,i]) 
        }
    }
    
  end
  def test_principalcomponents_ruby_gsl
    
    ran=Distribution::Normal.rng
    
#    @r=::Rserve::Connection.new

    samples=20
    [3,5,7].each {|k|
      v={}
      v["x0"]=samples.times.map { ran.call()}.to_scale.centered
      (1...k).each {|i|
        v["x#{i}"]=samples.times.map {|ii| ran.call()*0.5+v["x#{i-1}"][ii]*0.5}.to_scale.centered
      }
      
      ds=v.to_dataset
      cm=ds.covariance_matrix
#      @r.assign('ds',ds)
#      @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
#      puts "eigenvalues"
#      puts @r.eval('v').to_ruby.to_s
      pca_ruby=Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>false )
      pca_gsl =Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>true  )
      pc_ruby = pca_ruby.principal_components(ds)
      pc_gsl  = pca_gsl.principal_components(ds)
      # Test component matrix correlation!
      cm_ruby=pca_ruby.component_matrix
      #puts cm_ruby.summary
      k.times {|i|
        pc_id="PC_#{i+1}"
        assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i],1e-10)
        # Revert gsl component values
        pc_gsl_data= (pc_gsl[pc_id][0]-pc_ruby[pc_id][0]).abs>1e-6 ? pc_gsl[pc_id].recode {|v| -v} : pc_gsl[pc_id] 
        assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6,"PC for #{k} variables")
        if false
        k.times {|j| # variable
          ds_id="x#{j}"
          r= Statsample::Bivariate.correlation(ds[ds_id],pc_ruby[pc_id])
          puts "#{pc_id}-#{ds_id}:#{r}"
        }
        end
      }
    }
    #@r.close
  end
  def test_principalcomponents()
  principalcomponents(true)
  principalcomponents(false)
  
  end  
  def principalcomponents(gsl)
    ran=Distribution::Normal.rng
    samples=50
    x1=samples.times.map { ran.call()}.to_scale
    x2=samples.times.map {|i| ran.call()*0.5+x1[i]*0.5}.to_scale
    ds={'x1'=>x1,'x2'=>x2}.to_dataset
    
    cm=ds.correlation_matrix
    r=cm[0,1]
    pca=Statsample::Factor::PCA.new(cm,:m=>2,:use_gsl=>gsl)
    assert_in_delta(1+r,pca.eigenvalues[0],1e-10)
    assert_in_delta(1-r,pca.eigenvalues[1],1e-10)
    hs=1.0 / Math.sqrt(2)
    assert_equal_vector(Vector[1, 1]*hs, pca.eigenvectors[0])
    m_1=gsl ? Vector[-1,1] : Vector[1,-1]
    
    assert_equal_vector(hs*m_1, pca.eigenvectors[1])    
    
    pcs=pca.principal_components(ds)
    exp_pc_1=ds.collect_with_index {|row,i|
      hs*(row['x1']+row['x2'])
    }
    exp_pc_2=ds.collect_with_index {|row,i|
      gsl ? hs*(row['x2']-row['x1']) : hs*(row['x1']-row['x2'])

    }
    assert_similar_vector(exp_pc_1, pcs["PC_1"])
    assert_similar_vector(exp_pc_2, pcs["PC_2"])
  end
  def test_antiimage
    cor=Matrix[[1,0.964, 0.312],[0.964,1,0.411],[0.312,0.411,1]]
    expected=Matrix[[0.062,-0.057, 0.074],[-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]]
    ai=Statsample::Factor.anti_image_covariance_matrix(cor)
    assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected.to_s} not equal to #{ai.to_s}")
  end
  def test_kmo
      @v1=[1 ,2 ,3 ,4 ,7 ,8 ,9 ,10,14,15,20,50,60,70].to_scale
      @v2=[5 ,6 ,11,12,13,16,17,18,19,20,30,0,0,0].to_scale
      @v3=[10,3 ,20,30,40,50,80,10,20,30,40,2,3,4].to_scale
      # KMO: 0.490
      ds={'v1'=>@v1,'v2'=>@v2,'v3'=>@v3}.to_dataset
      cor=Statsample::Bivariate.correlation_matrix(ds)
     kmo=Statsample::Factor.kmo(cor)
     assert_in_delta(0.667, kmo,0.001)
     assert_in_delta(0.81, Statsample::Factor.kmo(harman_817),0.01)
     
  end
  def test_kmo_univariate
    m=harman_817
    expected=[0.73,0.76,0.84,0.87,0.53,0.93,0.78,0.86]
    m.row_size.times.map {|i|
      assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m,i),0.01)
    }
  end
  # Tested with SPSS and R
  def test_pca
      a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
      b=[2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_scale
      a.recode! {|c| c-a.mean}
      b.recode! {|c| c-b.mean}
      ds={'a'=>a,'b'=>b}.to_dataset
      cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
      if Statsample.has_gsl?
        pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>true)
        pca_set(pca,"gsl")
      else
        skip("Eigenvalues could be calculated with GSL (requires gsl)")
      end
      pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>false)
      pca_set(pca,"ruby")
  end
  def pca_set(pca,type)
      expected_eigenvalues=[1.284, 0.0490]
      expected_eigenvalues.each_with_index{|ev,i|
        assert_in_delta(ev,pca.eigenvalues[i],0.001)
      }
      expected_communality=[0.590, 0.694]
      expected_communality.each_with_index{|ev,i|
        assert_in_delta(ev,pca.communalities[i],0.001)
      }
      expected_cm=[0.768, 0.833]
      obs=pca.component_matrix_correlation(1).column(0).to_a
      expected_cm.each_with_index{|ev,i|
        assert_in_delta(ev,obs[i],0.001)
      }

      assert(pca.summary)
  end

  # Tested with R
  def test_principalaxis
      matrix=::Matrix[
      [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807],  [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
      
      
      fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1, :max_iterations=>50)

      cm=::Matrix[[0.923],[0.912],[0.507],[0.483]]
      
      assert_equal_matrix(cm,fa.component_matrix,0.001)
      
      h2=[0.852,0.832,0.257,0.233]
      h2.each_with_index{|ev,i|
        assert_in_delta(ev,fa.communalities[i],0.001)
      }
      eigen1=2.175
      assert_in_delta(eigen1, fa.eigenvalues[0],0.001)
      assert(fa.summary.size>0)
      fa=Statsample::Factor::PrincipalAxis.new(matrix,:smc=>false)
            
      assert_raise RuntimeError do
        fa.iterate
      end

  end


  def test_rotation_varimax
    a = Matrix[ [ 0.4320,  0.8129,  0.3872]  ,
      [0.7950, -0.5416,  0.2565]  ,
      [0.5944,  0.7234, -0.3441],
    [0.8945, -0.3921, -0.1863] ]

    expected= Matrix[[-0.0204423,     0.938674,    -0.340334],
      [0.983662, 0.0730206, 0.134997],
      [0.0826106, 0.435975, -0.893379],
    [0.939901, -0.0965213, -0.309596]]
    varimax=Statsample::Factor::Varimax.new(a)
    assert(!varimax.rotated.nil?, "Rotated shouldn't be empty")
    assert(!varimax.component_transformation_matrix.nil?, "Component matrix shouldn't be empty")
    assert(!varimax.h2.nil?, "H2 shouldn't be empty")
    
    assert_equal_matrix(expected,varimax.rotated,1e-6)
    assert(varimax.summary.size>0)
  end
  

end


================================================
FILE: test/test_factor_map.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
#require 'rserve'
#require 'statsample/rserve_extension'

class StatsampleFactorMpaTestCase < MiniTest::Unit::TestCase
  context Statsample::Factor::MAP do
    setup do
      m=Matrix[ 
            [ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
            [ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
            [ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
            [ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
            [ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
            [ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
            [ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
            [ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
      ]
      @map=Statsample::Factor::MAP.new(m)
    end
    should "return correct values with pure ruby" do
      @map.use_gsl=false
      map_assertions(@map)
    end
    should_with_gsl "return correct values with gsl" do
      #require 'ruby-prof'

      @map.use_gsl=true
      map_assertions(@map)    
    end
    
    
  end
  
  def map_assertions(map)
      assert_in_delta(map.minfm, 0.066445,0.00001)
      assert_equal(map.number_of_factors, 2)
      assert_in_delta(map.fm[0], 0.312475,0.00001)
      assert_in_delta(map.fm[1], 0.245121,0.00001)  
    end
  
  
end


================================================
FILE: test/test_factor_pa.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
#require 'rserve'
#require 'statsample/rserve_extension'

class StatsampleFactorTestCase < MiniTest::Unit::TestCase
  include Statsample::Fixtures
  # Based on Hardle and Simar
  def setup
    @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
  end
  def test_parallelanalysis_with_data
    if Statsample.has_gsl?
      samples=100
      variables=10
      iterations=50
      rng = Distribution::Normal.rng
      f1=samples.times.collect {rng.call}.to_scale
      f2=samples.times.collect {rng.call}.to_scale    
      vectors={}
      variables.times do |i|
        if i<5
          vectors["v#{i}"]=samples.times.collect {|nv|
            f1[nv]*5+f2[nv]*2+rng.call
          }.to_scale
        else
          vectors["v#{i}"]=samples.times.collect {|nv|
            f2[nv]*5+f1[nv]*2+rng.call
          }.to_scale
        end
        
      end
      ds=vectors.to_dataset
      
      pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations)
      pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95)
      3.times do |n|
        var="ev_0000#{n+1}"
        assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean,0.05)
      end
    else
      skip("Too slow without GSL")
    end
    
  end
  def test_parallelanalysis
    pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95)
    assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
    assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
    assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
    assert(pa.summary.size>0)
  end  
end


================================================
FILE: test/test_ggobi.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
require 'ostruct'
class StatsampleGGobiTestCase < MiniTest::Unit::TestCase

  def setup
    v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
    @v2=(%w{a b c a a a b b c d}*10).to_vector(:nominal)
    @v2.labels={"a"=>"letter a","d"=>"letter d"}
    v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
    @ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
  end
  def test_values_definition
    a=[1.0,2,"a",nil]
    assert_equal("1.0 2 a NA", Statsample::GGobi.values_definition(a,"NA"))
  end
  def test_variable_definition
    carrier=OpenStruct.new
    carrier.categorials=[]
    carrier.conversions={}
    real_var_definition=Statsample::GGobi.variable_definition(carrier,@v2,'variable 2',"v2")
		expected=<<-EOS
<categoricalvariable name="variable 2" nickname="v2">
<levels count="4">
<level value="1">letter a</level>
<level value="2">b</level>
<level value="3">c</level>
<level value="4">letter d</level></levels>
</categoricalvariable>
    EOS
    assert_equal(expected.gsub(/\s/," "),real_var_definition.gsub(/\s/," "))
    assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
    assert_equal(['variable 2'],carrier.categorials)
  end
end


================================================
FILE: test/test_gsl.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleGSLTestCase < MiniTest::Unit::TestCase
  should_with_gsl "matrix with gsl" do
    a=[1,2,3,4,20].to_vector(:scale)
    b=[3,2,3,4,50].to_vector(:scale)
    c=[6,2,3,4,3].to_vector(:scale)
    ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
    gsl=ds.to_matrix.to_gsl
    assert_equal(5,gsl.size1)
    assert_equal(3,gsl.size2)
    matrix=gsl.to_matrix
    assert_equal(5,matrix.row_size)
    assert_equal(3,matrix.column_size)
  end
end


================================================
FILE: test/test_histogram.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))


class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
  context Statsample::Histogram do
    should "alloc correctly with integer" do
      h = Statsample::Histogram.alloc(4)
      assert_equal([0.0]*4, h.bin)
      assert_equal([0.0]*5, h.range)
    end
    should "alloc correctly with array" do
      h = Statsample::Histogram.alloc([1, 3, 7, 9, 20])
      assert_equal([0.0]*4, h.bin)
      assert_equal([1,3,7,9,20], h.range)
    end
    should "alloc correctly with integer and min, max array" do
      h = Statsample::Histogram.alloc(5, [0, 5])
      assert_equal([0.0,1.0,2.0,3.0,4.0,5.0], h.range)
      assert_equal([0.0]*5,h.bin)
    end
    should "bin() method return correct number of bins" do
      h = Statsample::Histogram.alloc(4)
      assert_equal(4,h.bins)
    end
    should "increment correctly" do 
      h = Statsample::Histogram.alloc(5, [0, 5])      
      h.increment 2.5
      assert_equal([0.0,0.0,1.0,0.0,0.0], h.bin)
      h.increment [0.5,0.5,3.5,3.5]
      assert_equal([2.0,0.0,1.0,2.0,0.0], h.bin)
      h.increment 0
      assert_equal([3.0,0.0,1.0,2.0,0.0], h.bin)
      h.increment 5
      assert_equal([3.0,0.0,1.0,2.0,0.0], h.bin)
    end
    
    should "alloc_uniform correctly with n, min,max" do
      h = Statsample::Histogram.alloc_uniform(5,0,10)
      assert_equal(5,h.bins)
      assert_equal([0.0]*5,h.bin)
      assert_equal([0.0,2.0,4.0,6.0,8.0,10.0], h.range)
    end
    should "alloc_uniform correctly with n, [min,max]" do
      h = Statsample::Histogram.alloc_uniform(5, [0, 10])
      assert_equal(5,h.bins)
      assert_equal([0.0]*5,h.bin)
      assert_equal([0.0,2.0,4.0,6.0,8.0,10.0], h.range)
    end
    should "get_range()" do
      h = Statsample::Histogram.alloc_uniform(5,2,12)
      5.times {|i|
        assert_equal([2+i*2, 4+i*2], h.get_range(i))
      }
    end
    should "min() and max()" do
      h=Statsample::Histogram.alloc_uniform(5,2,12)
      assert_equal(2,h.min)
      assert_equal(12,h.max)
    end
    should "max_val()" do
       h = Statsample::Histogram.alloc(5, [0, 5])
       100.times {h.increment(rand*5)}
       max=h.bin[0]
       (1..4).each {|i|
         max = h.bin[i] if h.bin[i] > max
       }
       assert_equal(max,h.max_val)
    end
    should "min_val()" do
       h = Statsample::Histogram.alloc(5, [0, 5])
       100.times {h.increment(rand*5)}
       min=h.bin[0]
       (1..4).each {|i|
         min = h.bin[i] if h.bin[i]<min
       }
       assert_equal(min,h.min_val)
    end
    should "return correct estimated mean" do
      a=[1.5,1.5,1.5,3.5,3.5,3.5].to_scale
      h=Statsample::Histogram.alloc(5,[0,5])
      h.increment(a)
      assert_equal(2.5, h.estimated_mean)
    end
    should "return correct estimated standard deviation" do
      a=[0.5,1.5,1.5,1.5,2.5, 3.5,3.5,3.5,4.5].to_scale
      h=Statsample::Histogram.alloc(5,[0,5])
      h.increment(a)
      assert_equal(a.sd, h.estimated_standard_deviation)
    end
    should "return correct sum for all values" do
      h=Statsample::Histogram.alloc(5,[0,5])
      n=rand(100)
      n.times { h.increment(1)}
      assert_equal(n, h.sum)
    end
    should "return correct sum for a subset of values" do
      h=Statsample::Histogram.alloc(5,[0,5])
      h.increment([0.5,2.5,4.5])
      assert_equal(1,h.sum(0,1))
      assert_equal(2,h.sum(1,4))

    end
    should "not raise exception when all values equal" do
      assert_nothing_raised do
        a = [5,5,5,5,5,5].to_scale
        h=Statsample::Graph::Histogram.new(a)
        h.to_svg
      end
    end
    
  end
end


================================================
FILE: test/test_matrix.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
  
  def test_to_dataset
    m=Matrix[[1,4],[2,5],[3,6]]
    m.extend Statsample::NamedMatrix
    m.fields_y=%w{x1 x2}
    m.name="test"
    samples=100
    x1=[1,2,3].to_scale
    x2=[4,5,6].to_scale
    ds={'x1'=>x1,'x2'=>x2}.to_dataset
    ds.name="test"
    obs=m.to_dataset
    assert_equal(ds['x1'],obs['x1'])
    assert_equal(ds['x2'],obs['x2'])
    assert_equal(ds['x1'].mean,obs['x1'].mean)
    
    
  end
  def test_covariate
    a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
    a.extend Statsample::CovariateMatrix
    a.fields=%w{a b c}
    assert_equal(:correlation, a._type)

    assert_equal(Matrix[[0.5],[0.3]], a.submatrix(%w{c a}, %w{b}))
    assert_equal(Matrix[[1.0, 0.2] , [0.2, 1.0]], a.submatrix(%w{c a}))
    assert_equal(:correlation, a.submatrix(%w{c a})._type)

    a=Matrix[[20,30,10], [30,60,50], [10,50,50]]

    a.extend Statsample::CovariateMatrix

    assert_equal(:covariance, a._type)

    a=50.times.collect {rand()}.to_scale
    b=50.times.collect {rand()}.to_scale
    c=50.times.collect {rand()}.to_scale
    ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
    corr=Statsample::Bivariate.correlation_matrix(ds)
    real=Statsample::Bivariate.covariance_matrix(ds).correlation
    corr.row_size.times do |i|
      corr.column_size.times do |j|
        assert_in_delta(corr[i,j], real[i,j],1e-15)
      end
    end
  end  
end


================================================
FILE: test/test_multiset.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))


class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
  def setup
    @x=%w{a a a a b b b b}.to_vector
    @y=[1,2,3,4,5,6,7,8].to_scale
    @z=[10,11,12,13,14,15,16,17].to_scale
    @ds={'x'=>@x,'y'=>@y,'z'=>@z}.to_dataset
    @ms=@ds.to_multiset_by_split('x')
  end
  def test_creation
    v1a=[1,2,3,4,5].to_vector
    v2b=[11,21,31,41,51].to_vector
    v3c=[21,23,34,45,56].to_vector
    ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
    v1b=[15,25,35,45,55].to_vector
    v2b=[11,21,31,41,51].to_vector
    v3b=[21,23,34,45,56].to_vector
    ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
    ms=Statsample::Multiset.new(['v1','v2','v3'])
    ms.add_dataset('ds1',ds1)
    ms.add_dataset('ds2',ds2)
    assert_equal(ds1,ms['ds1'])
    assert_equal(ds2,ms['ds2'])
    assert_equal(v1a,ms['ds1']['v1'])
    assert_not_equal(v1b,ms['ds1']['v1'])
    ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
    assert_raise ArgumentError do
      ms.add_dataset(ds3)
    end
  end
  def test_creation_empty
    ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
    ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
    ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
    ms2=Statsample::Multiset.new(%w{id age name})
    ms2.add_dataset('male',ds_male)
    ms2.add_dataset('female',ds_female)
    assert_equal(ms2.fields,ms.fields)
    assert_equal(ms2['male'],ms['male'])
    assert_equal(ms2['female'],ms['female'])
  end
  def test_to_multiset_by_split_one
    sex=%w{m m m m m f f f f m}.to_vector(:nominal)
    city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
    age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
    ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
    ms=ds.to_multiset_by_split('sex')
    assert_equal(2,ms.n_datasets)
    assert_equal(%w{f m},ms.datasets.keys.sort)
    assert_equal(6,ms['m'].cases)
    assert_equal(4,ms['f'].cases)
    assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
    assert_equal([34,33,35,36],ms['f']['age'].to_a)
  end
  def test_to_multiset_by_split_multiple
    sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
    city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
    hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
    age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
    ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
    ms=ds.to_multiset_by_split('sex','city','hair')
    assert_equal(8,ms.n_datasets)
    assert_equal(3,ms[%w{m London blonde}].cases)
    assert_equal(3,ms[%w{m London blonde}].cases)
    assert_equal(1,ms[%w{m Paris black}].cases)
  end

  def test_stratum_proportion
    ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
    ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
    assert_equal(5.0/12, ds1['q1'].proportion )
    assert_equal(7.0/9, ds2['q1'].proportion )
    ms=Statsample::Multiset.new(['q1'])
    ms.add_dataset('d1',ds1)
    ms.add_dataset('d2',ds2)
    ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
    assert_in_delta(0.655, ss.proportion('q1'),0.01)
    assert_in_delta(0.345, ss.proportion('q1',0),0.01)

  end
  def test_stratum_scale
    boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
    girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
    ms=Statsample::Multiset.new(['test'])
    ms.add_dataset('boys',boys)
    ms.add_dataset('girls',girls)
    ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
    assert_equal(2,ss.strata_number)
    assert_equal(20000,ss.population_size)
    assert_equal(10000,ss.stratum_size('boys'))
    assert_equal(10000,ss.stratum_size('girls'))
    assert_equal(36,ss.sample_size)
    assert_equal(75,ss.mean('test'))
    assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
    assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
  end
  def test_each
    xpe={
      'a'=>%w{a a a a}.to_vector,
      'b'=>%w{b b b b}.to_vector
    }
    ype={
      'a'=>[1,2,3,4].to_scale,
      'b'=>[5,6,7,8].to_scale,
    }
    zpe={
      'a'=>[10,11,12,13].to_scale,
      'b'=>[14,15,16,17].to_scale,
    }
    xp,yp,zp=Hash.new(),Hash.new(),Hash.new()
    @ms.each {|k,ds|
      xp[k]=ds['x']
      yp[k]=ds['y']
      zp[k]=ds['z']
    }
    assert_equal(xpe,xp)
    assert_equal(ype,yp)
    assert_equal(zpe,zp)

  end
  def test_multiset_union_with_block
    
    r1=rand()
    r2=rand()
    ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
    
    ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
    
    ds2=@ms.union {|k,ds|
      ds['y'].recode!{|v| 
      k=='a' ? v*r1 : v*r2}
      ds['z'].recode!{|v| 
      k=='a' ? v*r1 : v*r2}
    }
    assert_equal(ye,ds2['y'])
    assert_equal(ze,ds2['z'])
  end
  def test_multiset_union
    r1=rand()
    r2=rand()
    ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
    
    ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
    @ms.each {|k,ds|
      ds['y'].recode!{|v| 
      k=='a' ? v*r1 : v*r2}
      ds['z'].recode!{|v| 
      k=='a' ? v*r1 : v*r2}
      
    }
    ds2=@ms.union
    assert_equal(ye,ds2['y'])
    assert_equal(ze,ds2['z'])
    
  end
end


================================================
FILE: test/test_regression.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleRegressionTestCase < MiniTest::Unit::TestCase
  context "Example with missing data" do
    setup do 
      @x=[0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].to_scale

      @y=[nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].to_scale      
      @ds={'x'=>@x,'y'=>@y}.to_dataset
      @lr=Statsample::Regression::Multiple::RubyEngine.new(@ds,'y')
    end
    should "have correct values" do
      assert_in_delta(0.455,@lr.r2,0.001)
      assert_in_delta(0.427,@lr.r2_adjusted, 0.001)
      assert_in_delta(0.1165,@lr.se_estimate,0.001)
      assert_in_delta(15.925,@lr.f,0.0001)
      assert_in_delta(0.675, @lr.standarized_coeffs['x'],0.001)
      assert_in_delta(0.778, @lr.coeffs['x'],0.001, "coeff x")
      assert_in_delta(0.132, @lr.constant,0.001,"constant")
      assert_in_delta(0.195, @lr.coeffs_se['x'],0.001,"coeff x se")
      assert_in_delta(0.064, @lr.constant_se,0.001,"constant se")
  end
  end
  should "return an error if data is linearly dependent" do 
    samples=100
    
    a,b=rand,rand
    
    x1=samples.times.map { rand}.to_scale
    x2=samples.times.map {rand}.to_scale
    x3=samples.times.map {|i| x1[i]*(1+a)+x2[i]*(1+b)}.to_scale
    y=samples.times.map {|i| x1[i]+x2[i]+x3[i]+rand}.to_scale

    ds={'x1'=>x1,'x2'=>x2,'x3'=>x3,'y'=>y}.to_dataset

    assert_raise(Statsample::Regression::LinearDependency) {
        Statsample::Regression::Multiple::RubyEngine.new(ds,'y')
    }
  end
  def test_parameters
    @x=[13,20,10,33,15].to_vector(:scale)
    @y=[23,18,35,10,27	].to_vector(:scale)
    reg=Statsample::Regression::Simple.new_from_vectors(@x,@y)
    _test_simple_regression(reg)
    ds={'x'=>@x,'y'=>@y}.to_dataset
    reg=Statsample::Regression::Simple.new_from_dataset(ds,'x','y')
    _test_simple_regression(reg)
    reg=Statsample::Regression.simple(@x,@y)
    _test_simple_regression(reg)

  end
  def _test_simple_regression(reg)
  
    assert_in_delta(40.009, reg.a,0.001)
    assert_in_delta(-0.957, reg.b,0.001)
    assert_in_delta(4.248,reg.standard_error,0.002)
    assert(reg.summary)
  end
  
  def test_summaries
    a=10.times.map{rand(100)}.to_scale
    b=10.times.map{rand(100)}.to_scale
    y=10.times.map{rand(100)}.to_scale
    ds={'a'=>a,'b'=>b,'y'=>y}.to_dataset
    lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')
    assert(lr.summary.size>0)
  end
  def test_multiple_dependent
    complete=Matrix[
      [1,0.53,0.62,0.19,-0.09,0.08,0.02,-0.12,0.08],
      [0.53,1,0.61,0.23,0.1,0.18,0.02,-0.1,0.15],
      [0.62,0.61,1,0.03,0.1,0.12,0.03,-0.06,0.12],
      [0.19,0.23,0.03,1,-0.02,0.02,0,-0.02,-0.02],
      [-0.09,0.1,0.1,-0.02,1,0.05,0.06,0.18,0.02],
      [0.08,0.18,0.12,0.02,0.05,1,0.22,-0.07,0.36],
      [0.02,0.02,0.03,0,0.06,0.22,1,-0.01,-0.05],
      [-0.12,-0.1,-0.06,-0.02,0.18,-0.07,-0.01,1,-0.03],
    [0.08,0.15,0.12,-0.02,0.02,0.36,-0.05,-0.03,1]]
    complete.extend Statsample::CovariateMatrix
    complete.fields=%w{adhd cd odd sex age monly mwork mage poverty}

    lr=Statsample::Regression::Multiple::MultipleDependent.new(complete, %w{adhd cd odd})


    assert_in_delta(0.197, lr.r2yx,0.001)
    assert_in_delta(0.197, lr.r2yx_covariance,0.001)
    assert_in_delta(0.07, lr.p2yx,0.001)

  end
  
  def test_multiple_regression_pairwise_2
    @a=[1,3,2,4,3,5,4,6,5,7,3,nil,3,nil,3].to_vector(:scale)
    @b=[3,3,4,4,5,5,6,6,4,4,2,2,nil,6,2].to_vector(:scale)
    @c=[11,22,30,40,50,65,78,79,99,100,nil,3,7,nil,7].to_vector(:scale)
    @y=[3,4,5,6,7,8,9,10,20,30,30,40,nil,50,nil].to_vector(:scale)
    ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
    lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')
    assert_in_delta(2407.436,lr.sst,0.001)
    assert_in_delta(0.752,lr.r,0.001, "pairwise r")
    assert_in_delta(0.565,lr.r2,0.001)
    assert_in_delta(1361.130,lr.ssr,0.001)
    assert_in_delta(1046.306,lr.sse,0.001)
    assert_in_delta(3.035,lr.f,0.001)
  end


  def test_multiple_regression_gsl
    if Statsample.has_gsl?
      @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
      @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
      @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
      @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
      ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
      lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y')
      assert(lr.summary.size>0)
      model_test(lr,'gsl')
      predicted=[1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
      c_predicted=lr.predicted
      predicted.each_index{|i|
        assert_in_delta(predicted[i],c_predicted[i],0.001)
      }
      residuals=[1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
      c_residuals=lr.residuals
      residuals.each_index{|i|
        assert_in_delta(residuals[i],c_residuals[i],0.001)
      }
    else
      skip "Regression::Multiple::GslEngine not tested (no Gsl)"
    end
  end


  def model_test_matrix(lr,name='undefined')

    stan_coeffs={'a'=>0.151,'b'=>-0.547,'c'=>0.997}
    unstan_coeffs={'a'=>0.695, 'b'=>-4.286, 'c'=>0.266}

    unstan_coeffs.each_key{|k|
      assert_in_delta(unstan_coeffs[k], lr.coeffs[k],0.001,"b coeffs - #{name}")
    }

    stan_coeffs.each_key{|k|
      assert_in_delta(stan_coeffs[k], lr.standarized_coeffs[k],0.001, "beta coeffs - #{name}")
    }

    assert_in_delta(11.027,lr.constant,0.001)

    assert_in_delta(0.955,lr.r,0.001)
    assert_in_delta(0.913,lr.r2,0.001)

    assert_in_delta(20.908, lr.f,0.001)
    assert_in_delta(0.001, lr.probability, 0.001)
    assert_in_delta(0.226,lr.tolerance("a"),0.001)

    coeffs_se={"a"=>1.171,"b"=>1.129,"c"=>0.072}


    ccoeffs_se=lr.coeffs_se
    coeffs_se.each_key{|k|
      assert_in_delta(coeffs_se[k],ccoeffs_se[k],0.001)
    }
    coeffs_t={"a"=>0.594,"b"=>-3.796,"c"=>3.703}
    ccoeffs_t=lr.coeffs_t
    coeffs_t.each_key{|k|
      assert_in_delta(coeffs_t[k], ccoeffs_t[k],0.001)
    }

    assert_in_delta(639.6,lr.sst,0.001)
    assert_in_delta(583.76,lr.ssr,0.001)
    assert_in_delta(55.840,lr.sse,0.001)
	assert(lr.summary.size>0, "#{name} without summary")
  end
  def model_test(lr,name='undefined')
    model_test_matrix(lr,name)
    assert_in_delta(4.559, lr.constant_se,0.001)
    assert_in_delta(2.419, lr.constant_t,0.001)

    assert_in_delta(1.785,lr.process([1,3,11]),0.001)
  end
  def test_regression_matrix
    @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
    @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
    @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
    @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
    ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
    cor=Statsample::Bivariate.correlation_matrix(ds)
    
    lr=Statsample::Regression::Multiple::MatrixEngine.new(cor,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size, :y_sd=>@y.sd , :x_sd=>{'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd})
    assert_nil(lr.constant_se)
    assert_nil(lr.constant_t)
    model_test_matrix(lr, "correlation matrix")

    covariance=Statsample::Bivariate.covariance_matrix(ds)
    lr=Statsample::Regression::Multiple::MatrixEngine.new(covariance,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size)
    assert(lr.summary.size>0)

    model_test(lr , "covariance matrix")
  end
  def test_regression_rubyengine
    @a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
    @b=[nil,3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
    @c=[nil,11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
    @y=[nil,3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
    ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
    lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')
    assert_equal(11, lr.total_cases)
    assert_equal(10, lr.valid_cases)
    model_test(lr, 'rubyengine with missing data')

    predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
    c_predicted = lr.predicted
    predicted.each_index do |i|
      if c_predicted[i].nil?
        assert(predicted[i].nil?, "Actual #{i} is nil, but expected #{predicted[i]}")
      else
        assert_in_delta(predicted[i], c_predicted[i], 0.001)
      end
    end
    residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
    c_residuals=lr.residuals
    residuals.each_index do |i|
      if c_residuals[i].nil?
        assert(residuals[i].nil?)
      else
        assert_in_delta(residuals[i],c_residuals[i],0.001)
      end
    end

  end
end


================================================
FILE: test/test_reliability.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
  context Statsample::Reliability do
    should "return correct r according to Spearman-Brown prophecy" do
      r=0.6849
      n=62.quo(15)
      assert_in_delta(0.9, Statsample::Reliability.sbp(r,n), 0.001)
    end
    should "return correct n for desired realiability" do
        r=0.6849
        r_d=0.9
        assert_in_delta(62, Statsample::Reliability.n_for_desired_reliability(r, r_d, 15),0.5)
      end
    context "Cronbach's alpha" do 
      setup do
        @samples=40
        @n_variables=rand(10)+2
        @ds=Statsample::Dataset.new()
        base=@samples.times.collect {|a| rand()}.to_scale
        @n_variables.times do |i|
          @ds[i]=base.collect {|v| v+rand()}.to_scale
        end
        
        @ds.update_valid_data
        @k=@ds.fields.size
        @cm=Statsample::Bivariate.covariance_matrix(@ds)
        @dse=@ds.dup
        @dse.fields.each do |f|
          @dse[f]=@dse[f].standarized
        end
        @cme=Statsample::Bivariate.covariance_matrix(@dse)
        @a=Statsample::Reliability.cronbach_alpha(@ds)
        @as=Statsample::Reliability.cronbach_alpha_standarized(@ds)
      end
      should "alpha will be equal to sum of matrix covariance less the individual variances" do
        total_sum=@cm.total_sum
        ind_var=@ds.fields.inject(0) {|ac,v| ac+@ds[v].variance}
        expected = @k.quo(@k-1) * (1-(ind_var.quo(total_sum)))
        assert_in_delta(expected, @a,1e-10)
      end
      should "method cronbach_alpha_from_n_s2_cov return correct values" do
        sa=Statsample::Reliability::ScaleAnalysis.new(@ds)
        vm, cm = sa.variances_mean, sa.covariances_mean
        assert_in_delta(sa.alpha, Statsample::Reliability.cronbach_alpha_from_n_s2_cov(@n_variables, vm,cm), 1e-10)        
      end
      should "method cronbach_alpha_from_covariance_matrix returns correct value" do
        cov=Statsample::Bivariate.covariance_matrix(@ds)
        assert_in_delta(@a, Statsample::Reliability.cronbach_alpha_from_covariance_matrix(cov),0.0000001)
      end
      should "return correct n for desired alpha, covariance and variance" do
        sa=Statsample::Reliability::ScaleAnalysis.new(@ds)
        vm, cm = sa.variances_mean, sa.covariances_mean
        n_obtained=Statsample::Reliability.n_for_desired_alpha(@a, vm,cm)
        #p n_obtained
        assert_in_delta(Statsample::Reliability.cronbach_alpha_from_n_s2_cov(n_obtained, vm,cm) ,@a,0.001) 
      end
      
      should "standarized alpha will be equal to sum of matrix covariance less the individual variances on standarized values" do
        total_sum=@cme.total_sum
        ind_var=@dse.fields.inject(0) {|ac,v| ac+@dse[v].variance}
        expected = @k.quo(@k-1) * (1-(ind_var.quo(total_sum)))
        assert_in_delta(expected, @as, 1e-10)
      end
    end
    context Statsample::Reliability::ItemCharacteristicCurve do
      setup do
        @samples=100
        @points=rand(10)+3
        @max_point=(@points-1)*3
        @x1=@samples.times.map{rand(@points)}.to_scale
        @x2=@samples.times.map{rand(@points)}.to_scale
        @x3=@samples.times.map{rand(@points)}.to_scale
        @ds={'a'=>@x1,'b'=>@x2,'c'=>@x3}.to_dataset
        @icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
      end
      should "have a correct automatic vector_total" do
        assert_equal(@ds.vector_sum, @icc.vector_total)
      end
      should "have a correct different vector_total" do
        x2=@samples.times.map{rand(10)}.to_scale
        @icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds,x2)
        assert_equal(x2, @icc.vector_total)
        assert_raises(ArgumentError) do
          inc=(@samples+10).times.map{rand(10)}.to_scale
          @icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds,inc)          
        end
      end
      should "have 0% for 0 points on maximum value values" do
        max=@icc.curve_field('a',0)[@max_point.to_f]
        max||=0
        assert_in_delta(0, max)
      end
      should "have 0 for max value on minimum value" do
        max=@icc.curve_field('a',@max_point)[0.0]
        max||=0
        assert_in_delta(0, max)
      end
      should "have correct values of % for any value" do
        sum=@icc.vector_total
        total={}
        total_g=sum.frequencies
        index=rand(@points)
        @x1.each_with_index do |v,i|
          total[sum[i]]||=0
          total[sum[i]]+=1 if v==index
        end
        expected=total.each {|k,v|
          total[k]=v.quo(total_g[k])
        }
        assert_equal(expected, @icc.curve_field('a',index))
        
      end
      
    end
    
    context Statsample::Reliability::MultiScaleAnalysis do
      
      setup do
        size=100
        @scales=3
        @items_per_scale=10
        h={}
        @scales.times {|s|
          @items_per_scale.times {|i|
            h["#{s}_#{i}"] = (size.times.map {(s*2)+rand}).to_scale
          }
        }
        @ds=h.to_dataset
        @msa=Statsample::Reliability::MultiScaleAnalysis.new(:name=>'Multiple Analysis') do |m|
          m.scale "complete", @ds
          @scales.times {|s|
            m.scale "scale_#{s}", @ds.clone(@items_per_scale.times.map {|i| "#{s}_#{i}"}), {:name=>"Scale #{s}"}
          }
        end
      end
      should "Retrieve correct ScaleAnalysis for whole scale" do
        sa=Statsample::Reliability::ScaleAnalysis.new(@ds, :name=>"Scale complete") 
        assert_equal(sa.variances_mean, @msa.scale("complete").variances_mean)
      end
      should "Retrieve correct ScaleAnalysis for each scale" do
        @scales.times {|s|
          sa=Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map {|i| "#{s}_#{i}"}), :name=>"Scale #{s}")
          assert_equal(sa.variances_mean,@msa.scale("scale_#{s}").variances_mean)
        }
      end
      should "retrieve correct correlation matrix for each scale" do
        vectors={'complete' => @ds.vector_sum}
        @scales.times {|s|
         vectors["scale_#{s}"]=@ds.dup(@items_per_scale.times.map {|i| "#{s}_#{i}"}).vector_sum 
        }
        ds2=vectors.to_dataset
        assert_equal(Statsample::Bivariate.correlation_matrix(ds2), @msa.correlation_matrix)
      end
      should "delete scale using delete_scale" do
        @msa.delete_scale("complete")
        assert_equal(@msa.scales.keys.sort, @scales.times.map {|s| "scale_#{s}"})
      end
      should "retrieve pca for scales" do
        @msa.delete_scale("complete")
        vectors=Hash.new
        @scales.times {|s|
         vectors["scale_#{s}"]=@ds.dup(@items_per_scale.times.map {|i| "#{s}_#{i}"}).vector_sum 
        }
        ds2=vectors.to_dataset
        cor_matrix=Statsample::Bivariate.correlation_matrix(ds2)
        m=3
        pca=Statsample::Factor::PCA.new(cor_matrix, :m=>m)
        assert_equal(pca.component_matrix, @msa.pca(:m=>m).component_matrix)
      end
      should "retrieve acceptable summary" do
        @msa.delete_scale("scale_0")
        @msa.delete_scale("scale_1")
        @msa.delete_scale("scale_2")
        
        
        #@msa.summary_correlation_matrix=true
        #@msa.summary_pca=true
        
        
        assert(@msa.summary.size>0)
      end
    end
    context Statsample::Reliability::ScaleAnalysis do
      setup do 
        @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_scale
        @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_scale
        @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_scale
        @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_scale
        @ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
        @ia=Statsample::Reliability::ScaleAnalysis.new(@ds)
        @cov_matrix=@ia.cov_m
      end     
      should "return correct values for item analysis" do 
        assert_in_delta(0.980,@ia.alpha,0.001)
        assert_in_delta(0.999,@ia.alpha_standarized,0.001)
        var_mean=4.times.map{|m| @cov_matrix[m,m]}.to_scale.mean 
        assert_in_delta(var_mean, @ia.variances_mean)
        assert_equal(@x1.mean, @ia.item_statistics['x1'][:mean])
        assert_equal(@x4.mean, @ia.item_statistics['x4'][:mean])
        assert_in_delta(@x1.sds, @ia.item_statistics['x1'][:sds],1e-14)
        assert_in_delta(@x4.sds, @ia.item_statistics['x4'][:sds],1e-14)
        ds2=@ds.clone
        ds2.delete_vector('x1')
        vector_sum=ds2.vector_sum
        assert_equal(vector_sum.mean, @ia.stats_if_deleted['x1'][:mean])
        assert_equal(vector_sum.sds, @ia.stats_if_deleted['x1'][:sds])
        assert_in_delta(vector_sum.variance, @ia.stats_if_deleted['x1'][:variance_sample],1e-10)

        assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted['x1'][:alpha])
        
        covariances=[]
        4.times.each {|i|
          4.times.each {|j|
            if i!=j 
              covariances.push(@cov_matrix[i,j])
            end
          }
        }
        assert_in_delta(covariances.to_scale.mean, @ia.covariances_mean)
        assert_in_delta(0.999,@ia.item_total_correlation()['x1'],0.001)
        assert_in_delta(1050.455,@ia.stats_if_deleted()['x1'][:variance_sample],0.001)
      end
      should "return a summary" do 
        assert(@ia.summary.size>0)
      end
      
    end
  end
end


================================================
FILE: test/test_reliability_icc.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

$reliability_icc=nil

class StatsampleReliabilityIccTestCase < MiniTest::Test
  context Statsample::Reliability::ICC do
    setup do
      a=[9,6,8,7,10,6].to_scale
      b=[2,1,4,1,5,2].to_scale
      c=[5,3,6,2,6,4].to_scale
      d=[8,2,8,6,9,7].to_scale
      @ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
      @icc=Statsample::Reliability::ICC.new(@ds)
    end
    should "basic method be correct" do
      assert_equal(6,@icc.n)
      assert_equal(4,@icc.k)
    end
    should "total mean be correct" do
      assert_in_delta(5.291, @icc.total_mean, 0.001)
    end
    should "df methods be correct" do
      assert_equal(5,  @icc.df_bt)
      assert_equal(18, @icc.df_wt)
      assert_equal(3,  @icc.df_bj)
      assert_equal(15, @icc.df_residual)
    end
    should "ms between targets be correct" do
      assert_in_delta(11.24, @icc.ms_bt, 0.01)
    end
    should "ms within targets be correct" do
      assert_in_delta(6.26,  @icc.ms_wt, 0.01)
    end
    should "ms between judges be correct" do 
      assert_in_delta(32.49, @icc.ms_bj, 0.01)
    end
    should "ms residual be correct" do
      assert_in_delta(1.02,  @icc.ms_residual, 0.01)
    end
    context "with McGraw and Wong denominations," do 
      
    end
    context "with Shrout & Fleiss denominations, " do 
      should "icc(1,1) method be correct" do
        assert_in_delta(0.17, @icc.icc_1_1, 0.01)
      end
      # Verified on SPSS and R
      should "icc(2,1) method be correct" do    
        assert_in_delta(0.29, @icc.icc_2_1, 0.01)
      end
      should "icc(3,1) method be correct" do
        assert_in_delta(0.71, @icc.icc_3_1, 0.01)
      end
      should "icc(1,k) method be correct" do
        assert_in_delta(0.44, @icc.icc_1_k, 0.01)
      end
      # Verified on SPSS and R
      should "icc(2,k) method be correct" do
        assert_in_delta(0.62, @icc.icc_2_k, 0.01)
      end 
      should "icc(3,k) method be correct" do
        assert_in_delta(0.91, @icc.icc_3_k, 0.01)
      end
      
      should "icc(1,1) F be correct" do
        assert_in_delta(1.795, @icc.icc_1_f.f)
      end
      should "icc(1,1) confidence interval should be correct" do
        assert_in_delta(-0.133, @icc.icc_1_1_ci[0], 0.001)
        assert_in_delta(0.723, @icc.icc_1_1_ci[1], 0.001)
      end
      should "icc(1,k) confidence interval should be correct" do
        assert_in_delta(-0.884, @icc.icc_1_k_ci[0], 0.001)
        assert_in_delta(0.912, @icc.icc_1_k_ci[1], 0.001)
      end
      
      should "icc(2,1) F be correct" do
        assert_in_delta(11.027, @icc.icc_2_f.f)
      end
      should "icc(2,1) confidence interval should be correct" do
        #skip("Not yet operational")
        assert_in_delta(0.019, @icc.icc_2_1_ci[0], 0.001)
        assert_in_delta(0.761, @icc.icc_2_1_ci[1], 0.001)
      end
      
      # Verified on SPSS and R      
      should "icc(2,k) confidence interval should be correct" do
        #skip("Not yet operational")
        #p @icc.icc_2_k_ci
        assert_in_delta(0.039, @icc.icc_2_k_ci[0], 0.001)
        assert_in_delta(0.929, @icc.icc_2_k_ci[1], 0.001)

      end
      #should "Shrout icc(2,k) and McGraw icc(a,k) ci be equal" do
      #  assert_in_delta(@icc.icc_2_k_ci_shrout[0], @icc.icc_2_k_ci_mcgraw[0], 10e-5)
      #end
      
      should "icc(3,1) F be correct" do
        assert_in_delta(11.027, @icc.icc_3_f.f)
      end
      
      should "icc(3,1) confidence interval should be correct" do
        assert_in_delta(0.342, @icc.icc_3_1_ci[0], 0.001)
        assert_in_delta(0.946, @icc.icc_3_1_ci[1], 0.001)
      end
      should "icc(3,k) confidence interval should be correct" do
        assert_in_delta(0.676, @icc.icc_3_k_ci[0], 0.001)
        assert_in_delta(0.986, @icc.icc_3_k_ci[1], 0.001)
      end
      should "incorrect type raises an error" do
        assert_raise(::RuntimeError) do 
          @icc.type=:nonexistant_type
        end
      end
    end
    
    begin
      require 'rserve'
      require 'statsample/rserve_extension'
      context "McGraw and Wong" do
        teardown do
          @r=$reliability_icc[:r].close unless $reliability_icc[:r].nil?
        end
        setup do
          if($reliability_icc.nil?)
            size=100
            a=size.times.map {rand(10)}.to_scale
            b=a.recode{|i|i+rand(4)-2}
            c=a.recode{|i|i+rand(4)-2}
            d=a.recode{|i|i+rand(4)-2}
            @ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
            
            @icc=Statsample::Reliability::ICC.new(@ds)
            @r=Rserve::Connection.new
            
            @r.assign('ds',@ds)
          
            @r.void_eval("library(irr);
              iccs=list(
              icc_1=icc(ds,'o','c','s'),
              icc_k=icc(ds,'o','c','a'),
              icc_c_1=icc(ds,'t','c','s'),
              icc_c_k=icc(ds,'t','c','a'),
              icc_a_1=icc(ds,'t','a','s'),
              icc_a_k=icc(ds,'t','a','a'))
              ")
            @iccs=@r.eval('iccs').to_ruby
            $reliability_icc={ :icc=>@icc, :iccs=>@iccs, :r=>@r
            }
            
          end
          @icc=$reliability_icc[:icc]
          @iccs=$reliability_icc[:iccs]
          @r=$reliability_icc[:r]

        end
        [:icc_1, :icc_k, :icc_c_1, :icc_c_k, :icc_a_1, :icc_a_k].each do |t|
          context "ICC Type #{t} " do
            should "value be correct" do
              @icc.type=t
              @r_icc=@iccs[t.to_s]
              assert_in_delta(@r_icc['value'],@icc.r)
            end
            should "fvalue be correct" do
              @icc.type=t
              @r_icc=@iccs[t.to_s]
              assert_in_delta(@r_icc['Fvalue'],@icc.f.f)
            end
            should "num df be correct" do
              @icc.type=t
              @r_icc=@iccs[t.to_s]
              assert_in_delta(@r_icc['df1'],@icc.f.df_num)
            end
            should "den df be correct" do
              @icc.type=t
              @r_icc=@iccs[t.to_s]
              assert_in_delta(@r_icc['df2'],@icc.f.df_den)
            end

            should "f probability be correct" do
              @icc.type=t
              @r_icc=@iccs[t.to_s]
              assert_in_delta(@r_icc['p.value'],@icc.f.probability)
            end
            should "bounds be equal" do
              @icc.type=t
              @r_icc=@iccs[t.to_s]
              assert_in_delta(@r_icc['lbound'],@icc.lbound)
              assert_in_delta(@r_icc['ubound'],@icc.ubound)  
            end
            should "summary generated" do
              assert(@icc.summary.size>0)
            end
          end
        end
      end
    rescue
      puts "requires rserve"
    end
    
  end
end


================================================
FILE: test/test_reliability_skillscale.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))


class StatsampleReliabilitySkillScaleTestCase < MiniTest::Unit::TestCase
  context Statsample::Reliability::SkillScaleAnalysis do
    setup do
      options=%w{a b c d e}
      cases=20
      @id=cases.times.map {|v| v}.to_scale
      @a=cases.times.map {options[rand(5)]}.to_vector
      @b=cases.times.map {options[rand(5)]}.to_vector
      @c=cases.times.map {options[rand(5)]}.to_vector
      @d=cases.times.map {options[rand(5)]}.to_vector
      @e=cases.times.map {|i|
        i==0 ? options[rand(0)] : 
          rand()>0.8 ? nil : options[rand(5)]
      }.to_vector
      @ds={'id'=>@id,'a'=>@a,'b'=>@b,'c'=>@c,'d'=>@d,'e'=>@e}.to_dataset
      @key={'a'=>"a", 'b'=>options[rand(5)], 'c'=>options[rand(5)], 'd'=>options[rand(5)],'e'=>options[rand(5)]}
      @ssa=Statsample::Reliability::SkillScaleAnalysis.new(@ds, @key)
      @ac=@a.map {|v| v==@key['a'] ? 1 : 0}.to_scale
      @bc=@b.map {|v| v==@key['b'] ? 1 : 0}.to_scale
      @cc=@c.map {|v| v==@key['c'] ? 1 : 0}.to_scale
      @dc=@d.map {|v| v==@key['d'] ? 1 : 0}.to_scale
      @ec=@e.map {|v| v.nil? ? nil : (v==@key['e'] ? 1 : 0)}.to_scale

    end
    should "return proper corrected dataset" do
      cds={'id'=>@id, 'a'=>@ac,'b'=>@bc,'c'=>@cc,'d'=>@dc, 'e'=>@ec}.to_dataset
      assert_equal(cds, @ssa.corrected_dataset)
    end
    should "return proper corrected minimal dataset" do
      cdsm={'a'=>@ac,'b'=>@bc,'c'=>@cc,'d'=>@dc, 'e'=>@ec}.to_dataset
      assert_equal(cdsm, @ssa.corrected_dataset_minimal)
    end
    should "return correct vector_sum and vector_sum" do
      cdsm=@ssa.corrected_dataset_minimal
      assert_equal(cdsm.vector_sum, @ssa.vector_sum)
      assert_equal(cdsm.vector_mean, @ssa.vector_mean)
    end
    should "not crash on rare case" do
      a=Statsample::Vector["c","c","a","a","c","a","b","c","c","b","a","d","a","d","a","a","d","e","c","d"]
      b=Statsample::Vector["e","b","e","b","c","d","a","e","e","c","b","e","e","b","d","c","e","b","b","d"]
      c=Statsample::Vector["e","b","e","c","e","c","b","d","e","c","a","a","b","d","e","c","b","a","a","e"]
      d=Statsample::Vector["a","b","d","d","e","b","e","b","d","c","e","a","c","d","c","c","e","d","d","b"]
      e=Statsample::Vector["a","b",nil,"d","c","c","d",nil,"d","d","e","e",nil,nil,nil,"d","c",nil,"e","d"]
      key={"a"=>"a", "b"=>"e", "c"=>"d", "d"=>"c", "e"=>"d"}
      ds=Statsample::Dataset.new("a"=>a,"b"=>b,"c"=>c,"d"=>d,"e"=>e)
      ssa=Statsample::Reliability::SkillScaleAnalysis.new(ds, key)
      assert(ssa.summary)
    end
    
    should "return valid summary" do
      assert(@ssa.summary.size>0)
    end
  end
end


================================================
FILE: test/test_resample.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleResampleTestCase < MiniTest::Unit::TestCase
  def initialize(*args)
    super
  end
  def test_basic
    r=Statsample::Resample.generate(20,1,10)
    assert_equal(20,r.size)
    assert(r.min>=1)
    assert(r.max<=10)
  end
  def test_repeat_and_save
    r=Statsample::Resample.repeat_and_save(400) {
      Statsample::Resample.generate(20,1,10).count(1)
    }
    assert_equal(400,r.size)
    v=Statsample::Vector.new(r,:scale)
    a=v.count {|x|  x > 3}
    assert(a>=30 && a<=70)
  end
end


================================================
FILE: test/test_rserve_extension.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
begin
  require 'rserve'
  require 'statsample/rserve_extension'

class StatsampleRserveExtensionTestCase < MiniTest::Unit::TestCase
  context "Statsample Rserve extensions" do
    setup do
      @r=Rserve::Connection.new
    end
    teardown do
      @r.close
    end
    should "return a valid rexp for numeric vector" do
      a=100.times.map {|i| rand()>0.9 ? nil : i+rand() }.to_scale
      rexp=a.to_REXP
      assert(rexp.is_a? Rserve::REXP::Double)
      assert_equal(rexp.to_ruby,a.data_with_nils)
      @r.assign 'a',rexp
      assert_equal(a.data_with_nils, @r.eval('a').to_ruby)
    end
    should "return a valid rserve dataframe for statsample datasets" do
      a=100.times.map {|i| rand()>0.9 ? nil : i+rand() }.to_scale
      b=100.times.map {|i| rand()>0.9 ? nil : i+rand() }.to_scale
      c=100.times.map {|i| rand()>0.9 ? nil : i+rand() }.to_scale
      ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
      rexp=ds.to_REXP
      assert(rexp.is_a? Rserve::REXP::GenericVector)
      ret=rexp.to_ruby
      assert_equal(a.data_with_nils, ret['a']) 
      @r.assign 'df', rexp
      out_df=@r.eval('df').to_ruby
      assert_equal('data.frame', out_df.attributes['class'])
      assert_equal(['a','b','c'], out_df.attributes['names'])
      assert_equal(a.data_with_nils, out_df['a'])
    end
  end
end

rescue LoadError
  puts "Require rserve extension"
end


================================================
FILE: test/test_srs.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleSrsTestCase < MiniTest::Unit::TestCase
  def test_std_error
    assert_equal(384,Statsample::SRS.estimation_n0(0.05,0.5,0.95).to_i)
    assert_equal(108,Statsample::SRS.estimation_n(0.05,0.5,150,0.95).to_i)
    assert_in_delta(0.0289,Statsample::SRS.proportion_sd_kp_wor(0.5,100,150),0.001)
  end
end


================================================
FILE: test/test_statistics.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleStatisicsTestCase < MiniTest::Unit::TestCase

  def initialize(*args)
    super
  end
  def test_p_using_cdf
    assert_equal(0.25, Statsample::Test.p_using_cdf(0.25, tails=:left))
    assert_equal(0.75, Statsample::Test.p_using_cdf(0.25, tails=:right))
    assert_equal(0.50, Statsample::Test.p_using_cdf(0.25, tails=:both))
    assert_equal(1, Statsample::Test.p_using_cdf(0.50, tails=:both))
    assert_equal(0.05, Statsample::Test.p_using_cdf(0.025, tails=:both))
    assert_in_delta(0.05, Statsample::Test.p_using_cdf(0.975, tails=:both),0.0001)
    
  end
  def test_recode_repeated
    a=%w{a b c c d d d e}
    exp=["a","b","c_1","c_2","d_1","d_2","d_3","e"]
    assert_equal(exp,a.recode_repeated)
  end
  def test_is_number
    assert("10".is_number?)
    assert("-10".is_number?)
    assert("0.1".is_number?)
    assert("-0.1".is_number?)
    assert("10e3".is_number?)
    assert("10e-3".is_number?)
    assert(!"1212-1212-1".is_number?)
    assert(!"a10".is_number?)
    assert(!"".is_number?)

  end
  def test_estimation_mean
    v=([42]*23+[41]*4+[36]*1+[32]*1+[29]*1+[27]*2+[23]*1+[19]*1+[16]*2+[15]*2+[14,11,10,9,7]+ [6]*3+[5]*2+[4,3]).to_vector(:scale)
    assert_equal(50,v.size)
    assert_equal(1471,v.sum())
    #limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
  end
  def test_estimation_proportion
    # total
    pop=3042
    sam=200
    prop=0.19
    assert_in_delta(81.8, Statsample::SRS.proportion_total_sd_ep_wor(prop, sam, pop), 0.1)

    # confidence limits
    pop=500
    sam=100
    prop=0.37
    a=0.95
    l= Statsample::SRS.proportion_confidence_interval_z(prop, sam, pop, a)
    assert_in_delta(0.28,l[0],0.01)
    assert_in_delta(0.46,l[1],0.01)
  end
  def test_ml
    if(true)
      #real=[1,1,1,1].to_vector(:scale)

      #pred=[0.0001,0.0001,0.0001,0.0001].to_vector(:scale)
      # puts  Statsample::Bivariate.maximum_likehood_dichotomic(pred,real)

    end
  end


  def test_simple_linear_regression
    a=[1,2,3,4,5,6].to_vector(:scale)
    b=[6,2,4,10,12,8].to_vector(:scale)
    reg = Statsample::Regression::Simple.new_from_vectors(a,b)
    assert_in_delta((reg.ssr+reg.sse).to_f,reg.sst,0.001)
    assert_in_delta(Statsample::Bivariate.pearson(a,b),reg.r,0.001)
    assert_in_delta(2.4,reg.a,0.01)
    assert_in_delta(1.314,reg.b,0.001)
    assert_in_delta(0.657,reg.r,0.001)
    assert_in_delta(0.432,reg.r2,0.001)
  end
end


================================================
FILE: test/test_stest.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleTestTestCase < MiniTest::Unit::TestCase
  def test_chi_square_matrix_with_expected
    real=Matrix[[95,95],[45,155]]
    expected=Matrix[[68,122],[72,128]]
    assert_nothing_raised do
      Statsample::Test.chi_square(real,expected)
    end
    chi=Statsample::Test.chi_square(real,expected).chi_square
    assert_in_delta(32.53,chi,0.1)
    
  end
  def test_chi_square_matrix_only_observed
    observed=Matrix[[20,30,40],[30,40,50],[60,70,80],[10,20,40]]
    assert_nothing_raised do
      Statsample::Test.chi_square(observed)
    end
    chi=Statsample::Test.chi_square(observed)
    assert_in_delta(9.5602, chi.chi_square, 0.0001)
    assert_in_delta(0.1444, chi.probability, 0.0001)

    assert_equal(6, chi.df)
    
  end
  
  def test_u_mannwhitney
    a=[1,2,3,4,5,6].to_scale
    b=[0,5,7,9,10,11].to_scale
    assert_equal(7.5, Statsample::Test.u_mannwhitney(a,b).u)
    assert_equal(7.5, Statsample::Test.u_mannwhitney(b,a).u)
    a=[1, 7,8,9,10,11].to_scale
    b=[2,3,4,5,6,12].to_scale
    assert_equal(11, Statsample::Test.u_mannwhitney(a,b).u)
  end


  def test_levene
    a=[1,2,3,4,5,6,7,8,100,10].to_scale
    b=[30,40,50,60,70,80,90,100,110,120].to_scale
    levene=Statsample::Test::Levene.new([a,b])
    assert_levene(levene)
  end
  def test_levene_dataset
    a=[1,2,3,4,5,6,7,8,100,10].to_scale
    b=[30,40,50,60,70,80,90,100,110,120].to_scale
    ds={'a'=>a,'b'=>b}.to_dataset
    levene=Statsample::Test::Levene.new(ds)
    assert_levene(levene)
  end
  def assert_levene(levene)
    assert_in_delta(0.778, levene.f, 0.001)
    assert_in_delta(0.389, levene.probability, 0.001)
  end
 
end


================================================
FILE: test/test_stratified.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleStratifiedTestCase < MiniTest::Unit::TestCase

  def initialize(*args)
    super
  end
  def test_mean
    a=[10,20,30,40,50]
    b=[110,120,130,140]
    pop=a+b
    av=a.to_vector(:scale)
    bv=b.to_vector(:scale)
    popv=pop.to_vector(:scale)
    assert_equal(popv.mean,Statsample::StratifiedSample.mean(av,bv))
  end
end


================================================
FILE: test/test_test_f.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleTestFTestCase < MiniTest::Unit::TestCase
  context(Statsample::Test::F) do 
    setup do
      @ssb=84
      @ssw=68
      @df_num=2
      @df_den=15
      @f=Statsample::Test::F.new(@ssb.quo(@df_num),@ssw.quo(@df_den), @df_num, @df_den)
    end
    should "have #f equal to msb/msw" do
      assert_equal((@ssb.quo(@df_num)).quo(@ssw.quo(@df_den)), @f.f)
    end
    should "have df total equal to df_num+df_den" do
      assert_equal(@df_num + @df_den, @f.df_total)
    end
    should "have probability near 0.002" do 
      assert_in_delta(0.002, @f.probability, 0.0005)
    end
    should "be coerced into float" do
      assert_equal(@f.to_f, @f.f)
    end
    
    context("method summary") do
      setup do
        @summary=@f.summary
      end
      should "have size > 0" do
        assert(@summary.size>0)
      end
    end
  end
end


================================================
FILE: test/test_test_kolmogorovsmirnov.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleTestKolmogorovSmirnovTestCase < MiniTest::Unit::TestCase
  context(Statsample::Test::KolmogorovSmirnov) do 
    should "calculate correctly D for two given samples" do
      a=[1.1,2.5,5.6,9]
      b=[1,2.3,5.8,10]
      ks=Statsample::Test::KolmogorovSmirnov.new(a,b)
      assert_equal(0.25,ks.d)
    end
    should "calculate correctly D for a normal sample and Normal Distribution" do
      a=[0.30022510,-0.36664035,0.08593404,1.29881130,-0.49878633,-0.63056010, 0.28397638, -0.04913700,0.03566644,-1.33414346]
      ks=Statsample::Test::KolmogorovSmirnov.new(a,Distribution::Normal)
      assert_in_delta(0.282, ks.d,0.001)
    end
    should "calculate correctly D for a variable normal and Normal Distribution" do
      rng=Distribution::Normal.rng
      a=100.times.map {rng.call}
      ks=Statsample::Test::KolmogorovSmirnov.new(a,Distribution::Normal)
      assert(ks.d<0.15)
    end
    
    context(Statsample::Test::KolmogorovSmirnov::EmpiricDistribution) do
      should "Create a correct empirical distribution for an array" do
        a=[10,9,8,7,6,5,4,3,2,1]
        ed=Statsample::Test::KolmogorovSmirnov::EmpiricDistribution.new(a)
        assert_equal(0, ed.cdf(-2))
        assert_equal(0.5, ed.cdf(5))
        assert_equal(0.5, ed.cdf(5.5))
        assert_equal(0.9, ed.cdf(9))
        assert_equal(1, ed.cdf(11))
      end
    end
  end
end


================================================
FILE: test/test_test_t.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleTestTTestCase < MiniTest::Unit::TestCase
  include Statsample::Test
  include Math
  context T do 
    setup do
      @a=[30.02, 29.99, 30.11, 29.97, 30.01, 29.99].to_scale
      @b=[29.89, 29.93, 29.72, 29.98, 30.02, 29.98].to_scale
      @x1=@a.mean
      @x2=@b.mean
      @s1=@a.sd
      @s2=@b.sd
      @n1=@a.n
      @n2=@b.n
    end
    should "calculate correctly standard t" do
      t=Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.n)), @a.n-1)
      assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.n))), t.t)
      assert_equal(@a.n-1, t.df)
      assert(t.summary.size>0)
    end
    should "calculate correctly t for one sample" do
      t1=[6, 4, 6, 7, 4,5,5,12,6,1].to_scale
      t2=[9, 6, 5,10,10,8,7,10,6,5].to_scale
      d=t1-t2
      t=Statsample::Test::T::OneSample.new(d)
      assert_in_delta(-2.631, t.t, 0.001)
      assert_in_delta( 0.027, t.probability, 0.001)
      assert_in_delta( 0.76012, t.se, 0.0001)
      assert(t.summary.size>0)
    end
    should "calculate correctly t for two samples" do
      assert_in_delta(1.959, T.two_sample_independent(@x1, @x2, @s1, @s2, @n1, @n2),0.001)
      assert_in_delta(1.959, T.two_sample_independent(@x1, @x2, @s1, @s2, @n1, @n2,true),0.001)
    end
    should "calculate correctly df for equal and unequal variance" do
      assert_equal(10,  T.df_equal_variance(@n1,@n2))
      assert_in_delta(7.03,  T.df_not_equal_variance(@s1,@s2,@n1,@n2),0.001)    
    end
    should "calculate all values for T object" do
      t=Statsample::Test.t_two_samples_independent(@a,@b)
      assert(t.summary.size>0)
      assert_in_delta(1.959, t.t_equal_variance,0.001)
      assert_in_delta(1.959, t.t_not_equal_variance,0.001)
      assert_in_delta(10, t.df_equal_variance,0.001)
      assert_in_delta(7.03, t.df_not_equal_variance,0.001)
      assert_in_delta(0.07856, t.probability_equal_variance,0.001)
      assert_in_delta(0.09095, t.probability_not_equal_variance,0.001)
    end
    should "be the same using shorthand" do
      v=100.times.map {rand(100)}.to_scale
      assert_equal(Statsample::Test.t_one_sample(v).t, T::OneSample.new(v).t)
    end
    should "calculate all values for one sample T test" do
      u=@a.mean+(1-rand*2)
      tos=T::OneSample.new(@a,{:u=>u})
      assert_equal((@a.mean-u).quo(@a.sd.quo(sqrt(@a.n))), tos.t)
      assert_equal(@a.n-1, tos.df)
      assert(tos.summary.size>0)
    end
  end
end


================================================
FILE: test/test_umannwhitney.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleUMannWhitneyTestCase < MiniTest::Unit::TestCase
  include Statsample::Test
  context Statsample::Test::UMannWhitney do
    setup do
      @v1=[1,2,3,4,7,8,9,10,14,15].to_scale
      @v2=[5,6,11,12,13,16,17,18,19].to_scale
      @u=Statsample::Test::UMannWhitney.new(@v1,@v2)
    end
    should "have same result using class or Test#u_mannwhitney" do 
      assert_equal(Statsample::Test.u_mannwhitney(@v1,@v2).u, @u.u) 
    end
    should "have correct U values" do
      assert_equal(73,@u.r1)
      assert_equal(117,@u.r2)
      assert_equal(18,@u.u)
    end
    should "have correct value for z" do
      assert_in_delta(-2.205,@u.z,0.001)
    end
    should "have correct value for z and exact probability" do
      assert_in_delta(0.027,@u.probability_z,0.001)
      assert_in_delta(0.028,@u.probability_exact,0.001)
    end
  end
end


================================================
FILE: test/test_vector.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleTestVector < MiniTest::Unit::TestCase
  include Statsample::Shorthand

  def setup
    @c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
    @c.name="Test Vector"
    @c.missing_values=[-99]
  end
  def assert_counting_tokens(b)
    assert_equal([1,1,0,1,0,nil],b['a'].to_a)
    assert_equal([0,1,0,0,0,nil],b['b'].to_a)
    assert_equal([0,0,1,0,0,nil],b['c'].to_a)
    assert_equal([0,0,1,1,0,nil],b['d'].to_a)
    assert_equal([0,0,0,0,1,nil],b[10].to_a)
  end
  context Statsample do
    setup do
      @sample=100
      @a=@sample.times.map{|i| (i+rand(10)) %10 ==0 ? nil : rand(100)}.to_scale
      @b=@sample.times.map{|i| (i+rand(10)) %10 ==0 ? nil : rand(100)}.to_scale
      @correct_a=Array.new
      @correct_b=Array.new
      @a.each_with_index do |v,i|
        if !@a[i].nil? and !@b[i].nil?
          @correct_a.push(@a[i])
          @correct_b.push(@b[i])
        end
      end
      @correct_a=@correct_a.to_scale
      @correct_b=@correct_b.to_scale

      @common=lambda  do |av,bv|
        assert_equal(@correct_a, av, "A no es esperado")
        assert_equal(@correct_b, bv, "B no es esperado")
        assert(!av.has_missing_data?, "A tiene datos faltantes")
        assert(!bv.has_missing_data?, "b tiene datos faltantes")
      end
    end
    should "return correct only_valid" do
      av,bv=Statsample.only_valid @a,@b
      av2,bv2=Statsample.only_valid av,bv
      @common.call(av,bv)
      assert_equal(av,av2)
      assert_not_same(av,av2)
      assert_not_same(bv,bv2)
    end
    should "return correct only_valid_clone" do
      av,bv=Statsample.only_valid_clone @a,@b
      @common.call(av,bv)
      av2,bv2=Statsample.only_valid_clone av,bv
      assert_equal(av,av2)
      assert_same(av,av2)
      assert_same(bv,bv2)
    end
  end
  context Statsample::Vector do
    setup do
      @c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
      @c.name="Test Vector"
      @c.missing_values=[-99]
    end
    should_with_gsl "be created with GSL::Vector" do
      gsl=GSL::Vector[1,2,3,4,5]
      v=Statsample::Vector.new(gsl)
      assert_equal([1,2,3,4,5], v.to_a)
      refute(v.flawed?)

    end

    context "using matrix operations" do
      setup do
        @a=[1,2,3,4,5].to_scale
      end
      should "to_matrix returns a matrix with 1 row" do
        mh=Matrix[[1,2,3,4,5]]
        assert_equal(mh,@a.to_matrix)
      end
      should "to_matrix(:vertical) returns a matrix with 1 column" do
        mv=Matrix.columns([[1,2,3,4,5]])
        assert_equal(mv,@a.to_matrix(:vertical))
      end
      should "returns valid submatrixes" do
        # 3*4 + 2*5 = 22
        a=[3,2].to_vector(:scale)
        b=[4,5].to_vector(:scale)
        assert_equal(22,(a.to_matrix*b.to_matrix(:vertical))[0,0])
      end
    end
    context "when initializing" do
      setup do
        @data=(10.times.map{rand(100)})+[nil]
        @original=Statsample::Vector.new(@data, :scale)
      end
      should "be the sample using []" do
        second=Statsample::Vector[*@data]
        assert_equal(@original, second)
      end
      should "[] returns same results as R-c()" do
        reference=[0,4,5,6,10].to_scale
        assert_equal(reference, Statsample::Vector[0,4,5,6,10])
        assert_equal(reference, Statsample::Vector[0,4..6,10])
        assert_equal(reference, Statsample::Vector[[0],[4,5,6],[10]])
        assert_equal(reference, Statsample::Vector[[0],[4,[5,[6]]],[10]])

        assert_equal(reference, Statsample::Vector[[0],[4,5,6].to_vector,[10]])

      end
      should "be the same usign #to_vector" do
        lazy1=@data.to_vector(:scale)
        assert_equal(@original,lazy1)
      end
      should "be the same using #to_scale" do
        lazy2=@data.to_scale
        assert_equal(@original,lazy2)
        assert_equal(:scale,lazy2.type)
        assert_equal(@data.find_all{|v| !v.nil?},lazy2.valid_data)
      end
      should "could use new_scale with size only" do
        v1=10.times.map {nil}.to_scale
        v2=Statsample::Vector.new_scale(10)
        assert_equal(v1,v2)

      end
      should "could use new_scale with size and value" do
        a=rand
        v1=10.times.map {a}.to_scale
        v2=Statsample::Vector.new_scale(10,a)
        assert_equal(v1,v2)
      end
      should "could use new_scale with func" do
        v1=10.times.map {|i| i*2}.to_scale
        v2=Statsample::Vector.new_scale(10) {|i| i*2}
        assert_equal(v1,v2)
      end

    end

    context "#split_by_separator" do

      setup do
        @a = Statsample::Vector.new(["a","a,b","c,d","a,d",10,nil],:nominal)
        @b=@a.split_by_separator(",")
      end
      should "returns a Hash" do
        assert_kind_of(Hash, @b)
      end
      should "return a Hash with keys with different values of @a" do
        expected=['a','b','c','d',10]
        assert_equal(expected, @b.keys)
      end

      should "returns a Hash, which values are Statsample::Vector" do
        @b.each_key {|k| assert_instance_of(Statsample::Vector, @b[k])}
      end
      should "hash values are n times the tokens appears" do
        assert_counting_tokens(@b)
      end
      should "#split_by_separator_freq returns the number of ocurrences of tokens" do
        assert_equal({'a'=>3,'b'=>1,'c'=>1,'d'=>2,10=>1}, @a.split_by_separator_freq())
      end
      should "using a different separator give the same values" do
        a = Statsample::Vector.new(["a","a*b","c*d","a*d",10,nil],:nominal)
        b=a.split_by_separator("*")
        assert_counting_tokens(b)
      end
    end
    should "return correct median_absolute_deviation" do
      a=[1, 1, 2, 2, 4, 6, 9].to_scale
      assert_equal(1, a.median_absolute_deviation)
    end
    should "return correct histogram" do
      a=10.times.map {|v| v}.to_scale
      hist=a.histogram(2)
      assert_equal([5,5], hist.bin)
      3.times do |i|
        assert_in_delta(i*4.5, hist.get_range(i)[0], 1e-9)
      end

    end
    should "have a name" do
      @c.name=="Test Vector"
    end
    should "without explicit name, returns vector with succesive numbers" do
      a=10.times.map{rand(100)}.to_scale
      b=10.times.map{rand(100)}.to_scale
      assert_match(/Vector \d+/, a.name)
      a.name=~/Vector (\d+)/
      next_number=$1.to_i+1
      assert_equal("Vector #{next_number}",b.name)
    end
    should "save to a file and load the same Vector" do
      outfile=Tempfile.new("vector.vec")
      @c.save(outfile.path)
      a=Statsample.load(outfile.path)
      assert_equal(@c,a)
    end
    should "#collect returns an array" do
      val=@c.collect {|v| v}
      assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
    end

    should "#recode returns a recoded array" do
      a=@c.recode{|v| @c.is_valid?(v) ? 0 : 1 }
      exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
      assert_equal(exp,a)
      exp.recode!{|v| v==0 ? 1:0}
      exp2=(([1]*15)+([0]*3)).to_vector
      assert_equal(exp2,exp)
    end
    should "#product returns the * of all values" do
      a=[1,2,3,4,5].to_vector(:scale)
      assert_equal(120,a.product)
    end

    should "missing values" do
      @c.missing_values=[10]
      assert_equal([-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9], @c.valid_data.sort)
      assert_equal([5,5,5,5,5,6,6,7,8,9,nil,1,2,3,4,nil,-99,-99], @c.data_with_nils)
      @c.missing_values=[-99]
      assert_equal(@c.valid_data.sort,[1,2,3,4,5,5,5,5,5,6,6,7,8,9,10])
      assert_equal(@c.data_with_nils,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,nil,nil])
      @c.missing_values=[]
      assert_equal(@c.valid_data.sort,[-99,-99,1,2,3,4,5,5,5,5,5,6,6,7,8,9,10])
      assert_equal(@c.data_with_nils,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])

    end
    should "correct has_missing_data? with missing data" do
      a=[1,2,3,nil].to_vector
      assert(a.has_missing_data?)
    end
    should "correct has_missing_data? without missing data" do
      a=[1,2,3,4,10].to_vector
      assert(!a.has_missing_data?)
    end
    should "with explicit missing_values, should respond has_missing_data?" do
      a=[1,2,3,4,10].to_vector
      a.missing_values=[10]
      assert(a.has_missing_data?)
    end
    should "label correctly fields" do
      @c.labels={5=>'FIVE'}
      assert_equal(["FIVE","FIVE","FIVE","FIVE","FIVE",6,6,7,8,9,10,1,2,3,4,nil,-99, -99],@c.vector_labeled.to_a)
    end
    should "verify" do
      h=@c.verify{|d| !d.nil? and d>0}
      e={15=>nil,16=>-99,17=>-99}
      assert_equal(e,h)
    end
    should "have a summary with name on it" do
      assert_match(/#{@c.name}/, @c.summary)
    end

    should "GSL::Vector based should push correcty" do
      if Statsample.has_gsl?
        v=GSL::Vector[1,2,3,4,5].to_scale
        v.push(nil)
        assert_equal([1,2,3,4,5,nil], v.to_a)
        assert(v.flawed?)
      else
        skip("Requires GSL")
      end
    end


    should "split correctly" do
      a = Statsample::Vector.new(["a","a,b","c,d","a,d","d",10,nil],:nominal)
      assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
    end
    should "multiply correct for scalar" do
      a = [1,2,3].to_scale
      assert_equal([5,10,15].to_scale, a*5)
    end
    should "multiply correct with other vector" do
      a = [1,2,3].to_scale
      b = [2,4,6].to_scale

      assert_equal([2,8,18].to_scale, a*b)
    end
    should "sum correct for scalar" do
      a = [1,2,3].to_scale
      assert_equal([11,12,13].to_scale, a+10)
    end

    should "raise NoMethodError when method requires ordinal and vector is nominal" do
      @c.type=:nominal
      assert_raise(::NoMethodError) { @c.median }
    end

    should "raise NoMethodError when method requires scalar and vector is ordinal" do
      @c.type=:ordinal
      assert_raise(::NoMethodError) { @c.mean }
    end
    should "jacknife correctly with named method" do
      # First example
      a=[1,2,3,4].to_scale
      ds=a.jacknife(:mean)
      assert_equal(a.mean, ds[:mean].mean)
      ds=a.jacknife([:mean,:sd])
      assert_equal(a.mean, ds[:mean].mean)
      assert_equal(a.sd, ds[:mean].sd)
    end
    should "jacknife correctly with custom method" do
      # Second example
      a=[17.23, 18.71,13.93,18.81,15.78,11.29,14.91,13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52,13.45,15.25].to_scale
      ds=a.jacknife(:log_s2=>lambda {|v|  Math.log(v.variance) })
      exp=[1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937].to_scale

      assert_similar_vector(exp, ds[:log_s2], 0.001)
      assert_in_delta(2.00389, ds[:log_s2].mean, 0.00001)
      assert_in_delta(1.091, ds[:log_s2].variance, 0.001)
    end
    should "jacknife correctly with k>1" do
      a=rnorm(6)
      ds=a.jacknife(:mean,2)
      mean=a.mean
      exp=[3*mean-2*(a[2]+a[3]+a[4]+a[5]) / 4, 3*mean-2*(a[0]+a[1]+a[4]+a[5]) / 4, 3*mean-2*(a[0]+a[1]+a[2]+a[3]) / 4].to_scale
      assert_similar_vector(exp, ds[:mean], 1e-13)
    end
    should "bootstrap should return a vector with mean=mu and sd=se" do
      a=rnorm(100)
      ds=a.bootstrap([:mean,:sd],200)
      se=1/Math.sqrt(a.size)
      assert_in_delta(0, ds[:mean].mean, 0.3)
      assert_in_delta(se, ds[:mean].sd, 0.02)
    end


  end


  def test_nominal
    assert_equal(@c[1],5)
    assert_equal({ 1=>1,2=>1,3=>1,4=>1,5=>5,6=>2,7=>1,8=>1, 9=>1,10=>1},@c.frequencies)
    assert_equal({ 1=>1,2=>1,3=>1,4=>1,5=>5,6=>2,7=>1,8=>1, 9=>1,10=>1},@c._frequencies)
    assert_equal({ 1 => 1.quo(15) ,2=>1.quo(15), 3=>1.quo(15),4=>1.quo(15),5=>5.quo(15),6=>2.quo(15),7=>1.quo(15), 8=>1.quo(15), 9=>1.quo(15),10=>1.quo(15)}, @c.proportions)
    assert_equal(@c.proportion, 1.quo(15))
    assert_equal(@c.proportion(2), 1.quo(15))
    assert_equal([1,2,3,4,5,6,7,8,9,10], @c.factors.sort)
    assert_equal(@c.mode,5)
    assert_equal(@c.n_valid,15)
  end
  def test_equality
    v1=[1,2,3].to_vector
    v2=[1,2,3].to_vector
    assert_equal(v1,v2)
    v1=[1,2,3].to_vector(:nominal)
    v2=[1,2,3].to_vector(:ordinal)
    assert_not_equal(v1,v2)
    v2=[1,2,3]
    assert_not_equal(v1,v2)
    v1=[1,2,3].to_vector()
    v2=[1,2,3].to_vector()
    assert_equal(v1,v2)
    assert_equal(false, v1 == Object.new)
  end
  def test_vector_percentil
    a=[1,2,2,3,4,5,5,5,6,10].to_scale
    expected=[10,25,25,40,50,70,70,70,90,100].to_scale
    assert_equal(expected, a.vector_percentil)
    a=[1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10].to_scale
    expected=[10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100].to_scale
    assert_equal(expected, a.vector_percentil)
  end
  def test_ordinal
    @c.type=:ordinal
    assert_equal(5,@c.median)
    assert_equal(4,@c.percentil(25))
    assert_equal(7,@c.percentil(75))

    v=[200000, 200000, 210000, 220000, 230000, 250000, 250000, 250000, 270000, 300000, 450000, 130000, 140000, 140000, 140000, 145000, 148000, 165000, 170000, 180000, 180000, 180000, 180000, 180000, 180000 ].to_scale
    assert_equal(180000,v.median)
    a=[7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 12.0, 12.0, 13.0, 14.0, 14.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0].to_scale
    assert_equal(4.5, a.percentil(25))
    assert_equal(6.5, a.percentil(50))
    assert_equal(9.5, a.percentil(75))
    assert_equal(3.0, a.percentil(10))
  end
  def test_linear_percentil_strategy
    values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_scale
    assert_equal 102, values.percentil(0, :linear)
    assert_equal 104.75, values.percentil(25, :linear)
    assert_equal 108.5, values.percentil(50, :linear)
    assert_equal 112.75, values.percentil(75, :linear)
    assert_equal 116, values.percentil(100, :linear)

    values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_scale
    assert_equal 102, values.percentil(0, :linear)
    assert_equal 105, values.percentil(25, :linear)
    assert_equal 109, values.percentil(50, :linear)
    assert_equal 115, values.percentil(75, :linear)
    assert_equal 118, values.percentil(100, :linear)
  end
  def test_ranked
    v1=[0.8,1.2,1.2,2.3,18].to_vector(:ordinal)
    expected=[1,2.5,2.5,4,5].to_vector(:ordinal)
    assert_equal(expected,v1.ranked)
    v1=[nil,0.8,1.2,1.2,2.3,18,nil].to_vector(:ordinal)
    expected=[nil,1,2.5,2.5,4,5,nil].to_vector(:ordinal)
    assert_equal(expected,v1.ranked)
  end
  def test_scale
    a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)
    assert_equal(10, a.sum)
    i=0
    factors=a.factors.sort
    [0,1,2,3,4].each{|v|
      assert(v==factors[i])
      assert(v.class==factors[i].class,"#{v} - #{v.class} != #{factors[i]} - #{factors[i].class}")
      i+=1
    }
  end
  def test_vector_centered
    mean=rand()
    samples=11
    centered=samples.times.map {|i| i-((samples/2).floor).to_i}.to_scale
    not_centered=centered.recode {|v| v+mean}
    obs=not_centered.centered
    centered.each_with_index do |v,i|
      assert_in_delta(v,obs[i],0.0001)
    end
  end
  def test_vector_standarized
    v1=[1,2,3,4,nil].to_vector(:scale)
    sds=v1.sds
    expected=[((1-2.5).quo(sds)),((2-2.5).quo(sds)),((3-2.5).quo(sds)),((4-2.5).quo(sds)), nil].to_vector(:scale)
    vs=v1.vector_standarized
    assert_equal(expected, vs)
    assert_equal(0,vs.mean)
    assert_equal(1,vs.sds)
  end

  def test_vector_standarized_with_zero_variance
    v1=100.times.map {|i| 1}.to_scale
    exp=100.times.map {nil}.to_scale
    assert_equal(exp,v1.standarized)
  end

    def test_check_type
    v=Statsample::Vector.new
    v.type=:nominal
    assert_raise(NoMethodError) { v.check_type(:scale)}
    assert_raise(NoMethodError) { v.check_type(:ordinal)}
    assert(v.check_type(:nominal).nil?)

    v.type=:ordinal

    assert_raise(NoMethodError) { v.check_type(:scale)}

    assert(v.check_type(:ordinal).nil?)
    assert(v.check_type(:nominal).nil?)


    v.type=:scale
    assert(v.check_type(:scale).nil?)
    assert(v.check_type(:ordinal).nil?)
    assert(v.check_type(:nominal).nil?)

    v.type=:date
    assert_raise(NoMethodError) { v.check_type(:scale)}
    assert_raise(NoMethodError) { v.check_type(:ordinal)}
    assert_raise(NoMethodError) { v.check_type(:nominal)}
  end

  def test_add
    a=Statsample::Vector.new([1,2,3,4,5], :scale)
    b=Statsample::Vector.new([11,12,13,14,15], :scale)
    assert_equal([3,4,5,6,7], (a+2).to_a)
    assert_equal([12,14,16,18,20], (a+b).to_a)
    assert_raise  ArgumentError do
      a + @c
    end
    assert_raise  TypeError do
      a+"string"
    end
    a=Statsample::Vector.new([nil,1, 2  ,3 ,4 ,5], :scale)
    b=Statsample::Vector.new([11, 12,nil,13,14,15], :scale)
    assert_equal([nil,13,nil,16,18,20], (a+b).to_a)
    assert_equal([nil,13,nil,16,18,20], (a+b.to_a).to_a)
  end
  def test_minus
    a=Statsample::Vector.new([1,2,3,4,5], :scale)
    b=Statsample::Vector.new([11,12,13,14,15], :scale)
    assert_equal([-1,0,1,2,3], (a-2).to_a)
    assert_equal([10,10,10,10,10], (b-a).to_a)
    assert_raise  ArgumentError do
      a-@c
    end
    assert_raise  TypeError do
      a-"string"
    end
    a=Statsample::Vector.new([nil,1, 2  ,3 ,4 ,5], :scale)
    b=Statsample::Vector.new([11, 12,nil,13,14,15], :scale)
    assert_equal([nil,11,nil,10,10,10], (b-a).to_a)
    assert_equal([nil,11,nil,10,10,10], (b-a.to_a).to_a)
  end
  def test_sum_of_squares
    a=[1,2,3,4,5,6].to_vector(:scale)
    assert_equal(17.5, a.sum_of_squared_deviation)
  end
  def test_average_deviation
    a=[1,2,3,4,5,6,7,8,9].to_scale
    assert_equal(20.quo(9), a.average_deviation_population)
  end
  def test_samples
    srand(1)
    assert_equal(100,@c.sample_with_replacement(100).size)
    assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
    assert_raise  ArgumentError do
      @c.sample_without_replacement(20)
    end
    @c.type=:scale
    srand(1)
    assert_equal(100, @c.sample_with_replacement(100).size)
    assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)

  end
  def test_valid_data
    a=Statsample::Vector.new([1,2,3,4,"STRING"])
    a.missing_values=[-99]
    a.add(1,false)
    a.add(2,false)
    a.add(-99,false)
    a.set_valid_data
    exp_valid_data=[1,2,3,4,"STRING",1,2]
    assert_equal(exp_valid_data,a.valid_data)
    a.add(20,false)
    a.add(30,false)
    assert_equal(exp_valid_data,a.valid_data)
    a.set_valid_data
    exp_valid_data_2=[1,2,3,4,"STRING",1,2,20,30]
    assert_equal(exp_valid_data_2,a.valid_data)
  end
  def test_set_value
    @c[2]=10
    expected=[5,5,10,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99].to_vector
    assert_equal(expected.data,@c.data)
  end
  def test_gsl
    if Statsample.has_gsl?
      a=Statsample::Vector.new([1,2,3,4,"STRING"], :scale)

      assert_equal(2,a.mean)
      assert_equal(a.variance_sample_ruby,a.variance_sample)
      assert_equal(a.standard_deviation_sample_ruby,a.sds)
      assert_equal(a.variance_population_ruby,a.variance_population)
      assert_equal(a.standard_deviation_population_ruby,a.standard_deviation_population)
      assert_nothing_raised do
        a=[].to_vector(:scale)
      end
      a.add(1,false)
      a.add(2,false)
      a.set_valid_data
      assert_equal(3,a.sum)
      b=[1,2,nil,3,4,5,nil,6].to_vector(:scale)
      assert_equal(21, b.sum)
      assert_equal(3.5, b.mean)
      assert_equal(6,b.gsl.size)
      c=[10,20,30,40,50,100,1000,2000,5000].to_scale
      assert_in_delta(c.skew,     c.skew_ruby     ,0.0001)
      assert_in_delta(c.kurtosis, c.kurtosis_ruby ,0.0001)
    end
  end
  def test_vector_matrix
    v1=%w{a a a b b b c c}.to_vector
    v2=%w{1 3 4 5 6 4 3 2}.to_vector
    v3=%w{1 0 0 0 1 1 1 0}.to_vector
    ex=Matrix.rows([["a", "1", "1"], ["a", "3", "0"], ["a", "4", "0"], ["b", "5", "0"], ["b", "6", "1"], ["b", "4", "1"], ["c", "3", "1"], ["c", "2", "0"]])
    assert_equal(ex,Statsample.vector_cols_matrix(v1,v2,v3))
  end
  def test_marshalling
    v1=(0..100).to_a.collect{|n| rand(100)}.to_vector(:scale)
    v2=Marshal.load(Marshal.dump(v1))
    assert_equal(v1,v2)
  end
  def test_dup
    v1=%w{a a a b b b c c}.to_vector
    v2=v1.dup
    assert_equal(v1.data,v2.data)
    assert_not_same(v1.data,v2.data)
    assert_equal(v1.type,v2.type)

    v1.type=:ordinal
    assert_not_equal(v1.type,v2.type)
    assert_equal(v1.missing_values,v2.missing_values)
    assert_not_same(v1.missing_values,v2.missing_values)
    assert_equal(v1.labels,v2.labels)
    assert_not_same(v1.labels,v2.labels)

    v3=v1.dup_empty
    assert_equal([],v3.data)
    assert_not_equal(v1.data,v3.data)
    assert_not_same(v1.data,v3.data)
    assert_equal(v1.type,v3.type)
    v1.type=:ordinal
    v3.type=:nominal
    assert_not_equal(v1.type,v3.type)
    assert_equal(v1.missing_values,v3.missing_values)
    assert_not_same(v1.missing_values,v3.missing_values)
    assert_equal(v1.labels,v3.labels)
    assert_not_same(v1.labels,v3.labels)
  end
  def test_paired_ties
    a=[0,0,0,1,1,2,3,3,4,4,4].to_vector(:ordinal)
    expected=[2,2,2,4.5,4.5,6,7.5,7.5,10,10,10].to_vector(:ordinal)
    assert_equal(expected,a.ranked)
  end
  def test_dichotomize
    a=  [0,0,0,1,2,3,nil].to_vector
    exp=[0,0,0,1,1,1,nil].to_scale
    assert_equal(exp,a.dichotomize)
    a=  [1,1,1,2,2,2,3].to_vector
    exp=[0,0,0,1,1,1,1].to_scale
    assert_equal(exp,a.dichotomize)
    a=  [0,0,0,1,2,3,nil].to_vector
    exp=[0,0,0,0,1,1,nil].to_scale
    assert_equal(exp,a.dichotomize(1))
    a= %w{a a a b c d}.to_vector
    exp=[0,0,0,1,1,1].to_scale
    assert_equal(exp, a.dichotomize)
  end
  def test_can_be_methods
    a=  [0,0,0,1,2,3,nil].to_vector
    assert(a.can_be_scale?)
    a=[0,"s",0,1,2,3,nil].to_vector
    assert(!a.can_be_scale?)
    a.missing_values=["s"]
    assert(a.can_be_scale?)

    a=[Date.new(2009,10,10), Date.today(), "2009-10-10", "2009-1-1", nil, "NOW"].to_vector
    assert(a.can_be_date?)
    a=[Date.new(2009,10,10), Date.today(),nil,"sss"].to_vector
    assert(!a.can_be_date?)
  end
  def test_date_vector
    a=[Date.new(2009,10,10), :NOW, "2009-10-10", "2009-1-1", nil, "NOW","MISSING"].to_vector(:date, :missing_values=>["MISSING"])

    assert(a.type==:date)
    expected=[Date.new(2009,10,10), Date.today(), Date.new(2009,10,10), Date.new(2009,1,1), nil, Date.today(), nil ]
    assert_equal(expected, a.date_data_with_nils)
  end
end


================================================
FILE: test/test_wilcoxonsignedrank.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))

class StatsampleUMannWhitneyTestCase < MiniTest::Unit::TestCase
  include Statsample::Test
  context Statsample::Test::WilcoxonSignedRank do
	context "Example 1" do
		setup do
		  @v1=[110,122,125,120,140,124,123,137,135,145].to_scale
		  @v2=[125,115,130,140,140,115,140,125,140,135].to_scale
		  @u=Statsample::Test::WilcoxonSignedRank.new(@v1,@v2)
		end
		should "have same result using class or Test#u_mannwhitney" do 
		  assert_equal(Statsample::Test.wilcoxon_signed_rank(@v1,@v2).w, @u.w) 
		end
		should "have correct W values" do
		  assert_equal(9,@u.w)
		end
		should "have correct nr values" do
			assert_equal(9,@u.nr)
		end
		should "have correct value for z" do
		  assert_in_delta(0.503,@u.z,0.001)
		end
		should "have correct value for probability_z" do
		  assert_in_delta(0.614,@u.probability_z,0.001)
		end
		should "have correct value for probability_exact" do
		  assert_in_delta(0.652,@u.probability_exact,0.001)
		end    
		should "have summary" do
		  assert(@u.summary!="")
		end 
	  end
	  
	  context "Example 2" do
		setup do
		  @v2=[78,24,64,45,64,52,30,50,64,50,78,22,84,40,90,72].to_scale
		  @v1=[78,24,62,48,68,56,25,44,56,40,68,36,68,20,58,32].to_scale
		  @u=Statsample::Test::WilcoxonSignedRank.new(@v1,@v2)
		end
		should "have same result using class or Test#u_mannwhitney" do 
		  assert_equal(Statsample::Test.wilcoxon_signed_rank(@v1,@v2).w, @u.w) 
		end
		should "have correct W values" do
		  assert_equal(67,@u.w)
		end
		should "have correct nr values" do
			assert_equal(14,@u.nr)
		end
		should "have correct value for z" do
		  assert_in_delta(2.087,@u.z,0.001)
		end
		should "have correct value for probability_z" do
		  assert_in_delta(0.036,@u.probability_z,0.001)
		end
		should "have correct value for probability_exact" do
		  assert_in_delta(0.036,@u.probability_exact,0.001)
		end    
		should "have summary" do
		  assert(@u.summary!="")
		end 
	  end
	
	
	end
	  
end


================================================
FILE: test/test_xls.rb
================================================
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleExcelTestCase < MiniTest::Unit::TestCase
  context "Excel reader" do
    setup do 
      @ds=Statsample::Excel.read(File.dirname(__FILE__)+"/fixtures/test_xls.xls")
    end
    should "set the number of cases" do
      assert_equal(6,@ds.cases)
    end
    should "set correct field names" do 
      assert_equal(%w{id name age city a1},@ds.fields)
    end
    should "set a dataset equal to expected" do 
      id=[1,2,3,4,5,6].to_vector(:scale)
      name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
      age=[20,23,25,nil,5.5,nil].to_vector(:scale)
      city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
      a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
      ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
      ds_exp.fields.each{|f|
        assert_equal(ds_exp[f],@ds[f])
      }
      assert_equal(ds_exp,@ds)
    end
    should "set to nil empty cells" do 
      assert_equal(nil,@ds['age'][5])
    end
  end
  context "Excel writer" do
    setup do 
      a=100.times.map{rand(100)}.to_scale
      b=(["b"]*100).to_vector
      @ds={'b'=>b, 'a'=>a}.to_dataset(%w{b a})
      tempfile=Tempfile.new("test_write.xls")
      Statsample::Excel.write(@ds,tempfile.path)
      @ds2=Statsample::Excel.read(tempfile.path)
    end
    should "return same fields as original" do
      assert_equal(@ds.fields ,@ds2.fields)
    end
    should "return same number of cases as original" do
      assert_equal(@ds.cases, @ds2.cases)
    end
    should "return same cases as original" do
      i=0
      @ds2.each_array do |row|
        assert_equal(@ds.case_as_array(i),row)
        i+=1
      end    
    end
  end
end


================================================
FILE: web/Rakefile
================================================
# -*- ruby -*-
require 'rake'
require 'fileutils'
directory "examples"

def get_base(f)
  f.sub(File.dirname(__FILE__)+"/../examples/","").gsub("/","_").gsub(".rb","")
end


EXAMPLES=Dir.glob(File.dirname(__FILE__)+"/../examples/**/*.rb").map {|v|     [v, get_base(v)]
}.find_all{|v| !v[0].include?"_data"}

EXAMPLES_BASE=EXAMPLES.map {|v| v[1]}


desc "Build all html, rtf and pdf files"
task :build_site do
  ruby "build_site.rb"
end


task :clean do
  Dir.glob(File.dirname(__FILE__)+"/examples/*.pdf").each do |t|
    FileUtils.rm t
  end
  Dir.glob(File.dirname(__FILE__)+"/examples/*.html").each do |t|
    FileUtils.rm t
  end
  Dir.glob(File.dirname(__FILE__)+"/examples/*.rtf").each do |t|
    FileUtils.rm t
  end
  Dir.glob(File.dirname(__FILE__)+"/examples/images/*.*").each do |t|
    FileUtils.rm t
  end
end


load 'upload_task.rb' if File.exists? "upload_task.rb"