Copy disabled (too large)
Download .txt
Showing preview only (15,257K chars total). Download the full file to get everything.
Repository: norbusan/scikit-learn
Branch: master
Commit: 248f6cf3156f
Files: 1269
Total size: 14.3 MB
Directory structure:
gitextract_8esimy8a/
├── .binder/
│ ├── postBuild
│ └── requirements.txt
├── .circleci/
│ ├── artifact_path
│ └── config.yml
├── .codecov.yml
├── .coveragerc
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── doc_improvement.yml
│ │ └── feature_request.yml
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── labeler-file-extensions.yml
│ ├── labeler-module.yml
│ ├── scripts/
│ │ └── label_title_regex.py
│ └── workflows/
│ ├── assign.yml
│ ├── check-changelog.yml
│ ├── check-manifest.yml
│ ├── labeler-module.yml
│ ├── labeler-title-regex.yml
│ ├── publish_pypi.yml
│ ├── twitter.yml
│ ├── unassign.yml
│ └── wheels.yml
├── .gitignore
├── .mailmap
├── .pre-commit-config.yaml
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── COPYING
├── MANIFEST.in
├── Makefile
├── README.rst
├── SECURITY.md
├── asv_benchmarks/
│ ├── .gitignore
│ ├── asv.conf.json
│ └── benchmarks/
│ ├── __init__.py
│ ├── cluster.py
│ ├── common.py
│ ├── config.json
│ ├── datasets.py
│ ├── decomposition.py
│ ├── ensemble.py
│ ├── linear_model.py
│ ├── manifold.py
│ ├── metrics.py
│ ├── model_selection.py
│ ├── neighbors.py
│ ├── svm.py
│ └── utils.py
├── azure-pipelines.yml
├── benchmarks/
│ ├── .gitignore
│ ├── bench_20newsgroups.py
│ ├── bench_covertype.py
│ ├── bench_feature_expansions.py
│ ├── bench_glm.py
│ ├── bench_glmnet.py
│ ├── bench_hist_gradient_boosting.py
│ ├── bench_hist_gradient_boosting_adult.py
│ ├── bench_hist_gradient_boosting_categorical_only.py
│ ├── bench_hist_gradient_boosting_higgsboson.py
│ ├── bench_hist_gradient_boosting_threading.py
│ ├── bench_isolation_forest.py
│ ├── bench_isotonic.py
│ ├── bench_kernel_pca_solvers_time_vs_n_components.py
│ ├── bench_kernel_pca_solvers_time_vs_n_samples.py
│ ├── bench_lasso.py
│ ├── bench_lof.py
│ ├── bench_mnist.py
│ ├── bench_multilabel_metrics.py
│ ├── bench_online_ocsvm.py
│ ├── bench_plot_fastkmeans.py
│ ├── bench_plot_hierarchical.py
│ ├── bench_plot_incremental_pca.py
│ ├── bench_plot_lasso_path.py
│ ├── bench_plot_neighbors.py
│ ├── bench_plot_nmf.py
│ ├── bench_plot_omp_lars.py
│ ├── bench_plot_parallel_pairwise.py
│ ├── bench_plot_polynomial_kernel_approximation.py
│ ├── bench_plot_randomized_svd.py
│ ├── bench_plot_svd.py
│ ├── bench_plot_ward.py
│ ├── bench_random_projections.py
│ ├── bench_rcv1_logreg_convergence.py
│ ├── bench_saga.py
│ ├── bench_sample_without_replacement.py
│ ├── bench_sgd_regression.py
│ ├── bench_sparsify.py
│ ├── bench_text_vectorizers.py
│ ├── bench_tree.py
│ ├── bench_tsne_mnist.py
│ └── plot_tsne_mnist.py
├── build_tools/
│ ├── Makefile
│ ├── azure/
│ │ ├── install.sh
│ │ ├── install_win.sh
│ │ ├── posix-docker.yml
│ │ ├── posix.yml
│ │ ├── test_docs.sh
│ │ ├── test_docstring.sh
│ │ ├── test_pytest_soft_dependency.sh
│ │ ├── test_script.sh
│ │ ├── upload_codecov.sh
│ │ └── windows.yml
│ ├── circle/
│ │ ├── build_doc.sh
│ │ ├── build_test_arm.sh
│ │ ├── build_test_pypy.sh
│ │ ├── checkout_merge_commit.sh
│ │ ├── linting.sh
│ │ ├── list_versions.py
│ │ └── push_doc.sh
│ ├── codespell_ignore_words.txt
│ ├── generate_authors_table.py
│ ├── github/
│ │ ├── Windows
│ │ ├── build_minimal_windows_image.sh
│ │ ├── build_source.sh
│ │ ├── build_wheels.sh
│ │ ├── check_build_trigger.sh
│ │ ├── check_wheels.py
│ │ ├── repair_windows_wheels.sh
│ │ ├── test_source.sh
│ │ ├── test_wheels.sh
│ │ ├── test_windows_wheels.sh
│ │ ├── upload_anaconda.sh
│ │ └── vendor.py
│ ├── shared.sh
│ └── travis/
│ ├── after_success.sh
│ ├── install.sh
│ ├── install_main.sh
│ ├── install_wheels.sh
│ ├── script.sh
│ ├── test_docs.sh
│ ├── test_script.sh
│ └── test_wheels.sh
├── conftest.py
├── doc/
│ ├── Makefile
│ ├── README.md
│ ├── about.rst
│ ├── authors.rst
│ ├── authors_emeritus.rst
│ ├── binder/
│ │ └── requirements.txt
│ ├── common_pitfalls.rst
│ ├── communication_team.rst
│ ├── computing/
│ │ ├── computational_performance.rst
│ │ ├── parallelism.rst
│ │ └── scaling_strategies.rst
│ ├── computing.rst
│ ├── conf.py
│ ├── conftest.py
│ ├── contents.rst
│ ├── data_transforms.rst
│ ├── datasets/
│ │ ├── loading_other_datasets.rst
│ │ ├── real_world.rst
│ │ ├── sample_generators.rst
│ │ └── toy_dataset.rst
│ ├── datasets.rst
│ ├── developers/
│ │ ├── advanced_installation.rst
│ │ ├── bug_triaging.rst
│ │ ├── contributing.rst
│ │ ├── develop.rst
│ │ ├── index.rst
│ │ ├── maintainer.rst
│ │ ├── performance.rst
│ │ ├── plotting.rst
│ │ ├── tips.rst
│ │ └── utilities.rst
│ ├── faq.rst
│ ├── getting_started.rst
│ ├── glossary.rst
│ ├── governance.rst
│ ├── includes/
│ │ ├── big_toc_css.rst
│ │ └── bigger_toc_css.rst
│ ├── inspection.rst
│ ├── install.rst
│ ├── make.bat
│ ├── model_selection.rst
│ ├── modules/
│ │ ├── biclustering.rst
│ │ ├── calibration.rst
│ │ ├── classes.rst
│ │ ├── clustering.rst
│ │ ├── compose.rst
│ │ ├── covariance.rst
│ │ ├── cross_decomposition.rst
│ │ ├── cross_validation.rst
│ │ ├── decomposition.rst
│ │ ├── density.rst
│ │ ├── ensemble.rst
│ │ ├── feature_extraction.rst
│ │ ├── feature_selection.rst
│ │ ├── gaussian_process.rst
│ │ ├── grid_search.rst
│ │ ├── impute.rst
│ │ ├── isotonic.rst
│ │ ├── kernel_approximation.rst
│ │ ├── kernel_ridge.rst
│ │ ├── lda_qda.rst
│ │ ├── learning_curve.rst
│ │ ├── linear_model.rst
│ │ ├── manifold.rst
│ │ ├── metrics.rst
│ │ ├── mixture.rst
│ │ ├── model_evaluation.rst
│ │ ├── model_persistence.rst
│ │ ├── multiclass.rst
│ │ ├── naive_bayes.rst
│ │ ├── neighbors.rst
│ │ ├── neural_networks_supervised.rst
│ │ ├── neural_networks_unsupervised.rst
│ │ ├── outlier_detection.rst
│ │ ├── partial_dependence.rst
│ │ ├── permutation_importance.rst
│ │ ├── pipeline.rst
│ │ ├── preprocessing.rst
│ │ ├── preprocessing_targets.rst
│ │ ├── random_projection.rst
│ │ ├── semi_supervised.rst
│ │ ├── sgd.rst
│ │ ├── svm.rst
│ │ ├── tree.rst
│ │ └── unsupervised_reduction.rst
│ ├── preface.rst
│ ├── presentations.rst
│ ├── related_projects.rst
│ ├── roadmap.rst
│ ├── sphinxext/
│ │ ├── MANIFEST.in
│ │ ├── add_toctree_functions.py
│ │ ├── custom_references_resolver.py
│ │ ├── doi_role.py
│ │ ├── github_link.py
│ │ └── sphinx_issues.py
│ ├── supervised_learning.rst
│ ├── support.rst
│ ├── templates/
│ │ ├── class.rst
│ │ ├── class_with_call.rst
│ │ ├── deprecated_class.rst
│ │ ├── deprecated_class_with_call.rst
│ │ ├── deprecated_class_without_init.rst
│ │ ├── deprecated_function.rst
│ │ ├── function.rst
│ │ ├── generate_deprecated.sh
│ │ ├── index.html
│ │ ├── numpydoc_docstring.rst
│ │ └── redirects.html
│ ├── testimonials/
│ │ ├── README.txt
│ │ ├── images/
│ │ │ └── Makefile
│ │ └── testimonials.rst
│ ├── themes/
│ │ └── scikit-learn-modern/
│ │ ├── javascript.html
│ │ ├── layout.html
│ │ ├── nav.html
│ │ ├── search.html
│ │ ├── static/
│ │ │ ├── css/
│ │ │ │ └── theme.css
│ │ │ └── js/
│ │ │ └── searchtools.js
│ │ └── theme.conf
│ ├── triage_team.rst
│ ├── tune_toc.rst
│ ├── tutorial/
│ │ ├── basic/
│ │ │ └── tutorial.rst
│ │ ├── common_includes/
│ │ │ └── info.txt
│ │ ├── index.rst
│ │ ├── machine_learning_map/
│ │ │ ├── ML_MAPS_README.txt
│ │ │ ├── index.rst
│ │ │ ├── parse_path.py
│ │ │ ├── pyparsing.py
│ │ │ └── svg2imagemap.py
│ │ ├── statistical_inference/
│ │ │ ├── index.rst
│ │ │ ├── model_selection.rst
│ │ │ ├── putting_together.rst
│ │ │ ├── settings.rst
│ │ │ ├── supervised_learning.rst
│ │ │ └── unsupervised_learning.rst
│ │ └── text_analytics/
│ │ ├── .gitignore
│ │ ├── data/
│ │ │ ├── languages/
│ │ │ │ └── fetch_data.py
│ │ │ ├── movie_reviews/
│ │ │ │ └── fetch_data.py
│ │ │ └── twenty_newsgroups/
│ │ │ └── fetch_data.py
│ │ ├── skeletons/
│ │ │ ├── exercise_01_language_train_model.py
│ │ │ └── exercise_02_sentiment.py
│ │ ├── solutions/
│ │ │ ├── exercise_01_language_train_model.py
│ │ │ ├── exercise_02_sentiment.py
│ │ │ └── generate_skeletons.py
│ │ └── working_with_text_data.rst
│ ├── unsupervised_learning.rst
│ ├── user_guide.rst
│ ├── visualizations.rst
│ ├── whats_new/
│ │ ├── _contributors.rst
│ │ ├── changelog_legend.inc
│ │ ├── older_versions.rst
│ │ ├── v0.13.rst
│ │ ├── v0.14.rst
│ │ ├── v0.15.rst
│ │ ├── v0.16.rst
│ │ ├── v0.17.rst
│ │ ├── v0.18.rst
│ │ ├── v0.19.rst
│ │ ├── v0.20.rst
│ │ ├── v0.21.rst
│ │ ├── v0.22.rst
│ │ ├── v0.23.rst
│ │ ├── v0.24.rst
│ │ ├── v1.0.rst
│ │ └── v1.1.rst
│ └── whats_new.rst
├── examples/
│ ├── README.txt
│ ├── applications/
│ │ ├── README.txt
│ │ ├── plot_cyclical_feature_engineering.py
│ │ ├── plot_digits_denoising.py
│ │ ├── plot_face_recognition.py
│ │ ├── plot_model_complexity_influence.py
│ │ ├── plot_out_of_core_classification.py
│ │ ├── plot_outlier_detection_wine.py
│ │ ├── plot_prediction_latency.py
│ │ ├── plot_species_distribution_modeling.py
│ │ ├── plot_stock_market.py
│ │ ├── plot_tomography_l1_reconstruction.py
│ │ ├── plot_topics_extraction_with_nmf_lda.py
│ │ ├── svm_gui.py
│ │ └── wikipedia_principal_eigenvector.py
│ ├── bicluster/
│ │ ├── README.txt
│ │ ├── plot_bicluster_newsgroups.py
│ │ ├── plot_spectral_biclustering.py
│ │ └── plot_spectral_coclustering.py
│ ├── calibration/
│ │ ├── README.txt
│ │ ├── plot_calibration.py
│ │ ├── plot_calibration_curve.py
│ │ ├── plot_calibration_multiclass.py
│ │ └── plot_compare_calibration.py
│ ├── classification/
│ │ ├── README.txt
│ │ ├── plot_classification_probability.py
│ │ ├── plot_classifier_comparison.py
│ │ ├── plot_digits_classification.py
│ │ ├── plot_lda.py
│ │ └── plot_lda_qda.py
│ ├── cluster/
│ │ ├── README.txt
│ │ ├── plot_adjusted_for_chance_measures.py
│ │ ├── plot_affinity_propagation.py
│ │ ├── plot_agglomerative_clustering.py
│ │ ├── plot_agglomerative_clustering_metrics.py
│ │ ├── plot_agglomerative_dendrogram.py
│ │ ├── plot_birch_vs_minibatchkmeans.py
│ │ ├── plot_cluster_comparison.py
│ │ ├── plot_cluster_iris.py
│ │ ├── plot_coin_segmentation.py
│ │ ├── plot_coin_ward_segmentation.py
│ │ ├── plot_color_quantization.py
│ │ ├── plot_dbscan.py
│ │ ├── plot_dict_face_patches.py
│ │ ├── plot_digits_agglomeration.py
│ │ ├── plot_digits_linkage.py
│ │ ├── plot_face_compress.py
│ │ ├── plot_feature_agglomeration_vs_univariate_selection.py
│ │ ├── plot_inductive_clustering.py
│ │ ├── plot_kmeans_assumptions.py
│ │ ├── plot_kmeans_digits.py
│ │ ├── plot_kmeans_plusplus.py
│ │ ├── plot_kmeans_silhouette_analysis.py
│ │ ├── plot_kmeans_stability_low_dim_dense.py
│ │ ├── plot_linkage_comparison.py
│ │ ├── plot_mean_shift.py
│ │ ├── plot_mini_batch_kmeans.py
│ │ ├── plot_optics.py
│ │ ├── plot_segmentation_toy.py
│ │ └── plot_ward_structured_vs_unstructured.py
│ ├── compose/
│ │ ├── README.txt
│ │ ├── plot_column_transformer.py
│ │ ├── plot_column_transformer_mixed_types.py
│ │ ├── plot_compare_reduction.py
│ │ ├── plot_digits_pipe.py
│ │ ├── plot_feature_union.py
│ │ └── plot_transformed_target.py
│ ├── covariance/
│ │ ├── README.txt
│ │ ├── plot_covariance_estimation.py
│ │ ├── plot_lw_vs_oas.py
│ │ ├── plot_mahalanobis_distances.py
│ │ ├── plot_robust_vs_empirical_covariance.py
│ │ └── plot_sparse_cov.py
│ ├── cross_decomposition/
│ │ ├── README.txt
│ │ ├── plot_compare_cross_decomposition.py
│ │ └── plot_pcr_vs_pls.py
│ ├── datasets/
│ │ ├── README.txt
│ │ ├── plot_digits_last_image.py
│ │ ├── plot_iris_dataset.py
│ │ ├── plot_random_dataset.py
│ │ └── plot_random_multilabel_dataset.py
│ ├── decomposition/
│ │ ├── README.txt
│ │ ├── plot_beta_divergence.py
│ │ ├── plot_faces_decomposition.py
│ │ ├── plot_ica_blind_source_separation.py
│ │ ├── plot_ica_vs_pca.py
│ │ ├── plot_image_denoising.py
│ │ ├── plot_incremental_pca.py
│ │ ├── plot_kernel_pca.py
│ │ ├── plot_pca_3d.py
│ │ ├── plot_pca_iris.py
│ │ ├── plot_pca_vs_fa_model_selection.py
│ │ ├── plot_pca_vs_lda.py
│ │ ├── plot_sparse_coding.py
│ │ └── plot_varimax_fa.py
│ ├── ensemble/
│ │ ├── README.txt
│ │ ├── plot_adaboost_hastie_10_2.py
│ │ ├── plot_adaboost_multiclass.py
│ │ ├── plot_adaboost_regression.py
│ │ ├── plot_adaboost_twoclass.py
│ │ ├── plot_bias_variance.py
│ │ ├── plot_ensemble_oob.py
│ │ ├── plot_feature_transformation.py
│ │ ├── plot_forest_importances.py
│ │ ├── plot_forest_importances_faces.py
│ │ ├── plot_forest_iris.py
│ │ ├── plot_gradient_boosting_categorical.py
│ │ ├── plot_gradient_boosting_early_stopping.py
│ │ ├── plot_gradient_boosting_oob.py
│ │ ├── plot_gradient_boosting_quantile.py
│ │ ├── plot_gradient_boosting_regression.py
│ │ ├── plot_gradient_boosting_regularization.py
│ │ ├── plot_isolation_forest.py
│ │ ├── plot_monotonic_constraints.py
│ │ ├── plot_random_forest_embedding.py
│ │ ├── plot_random_forest_regression_multioutput.py
│ │ ├── plot_stack_predictors.py
│ │ ├── plot_voting_decision_regions.py
│ │ ├── plot_voting_probas.py
│ │ └── plot_voting_regressor.py
│ ├── exercises/
│ │ ├── README.txt
│ │ ├── plot_cv_diabetes.py
│ │ ├── plot_cv_digits.py
│ │ ├── plot_digits_classification_exercise.py
│ │ └── plot_iris_exercise.py
│ ├── feature_selection/
│ │ ├── README.txt
│ │ ├── plot_f_test_vs_mi.py
│ │ ├── plot_feature_selection.py
│ │ ├── plot_feature_selection_pipeline.py
│ │ ├── plot_rfe_digits.py
│ │ ├── plot_rfe_with_cross_validation.py
│ │ └── plot_select_from_model_diabetes.py
│ ├── gaussian_process/
│ │ ├── README.txt
│ │ ├── plot_compare_gpr_krr.py
│ │ ├── plot_gpc.py
│ │ ├── plot_gpc_iris.py
│ │ ├── plot_gpc_isoprobability.py
│ │ ├── plot_gpc_xor.py
│ │ ├── plot_gpr_co2.py
│ │ ├── plot_gpr_noisy.py
│ │ ├── plot_gpr_noisy_targets.py
│ │ ├── plot_gpr_on_structured_data.py
│ │ └── plot_gpr_prior_posterior.py
│ ├── impute/
│ │ ├── README.txt
│ │ ├── plot_iterative_imputer_variants_comparison.py
│ │ └── plot_missing_values.py
│ ├── inspection/
│ │ ├── README.txt
│ │ ├── plot_linear_model_coefficient_interpretation.py
│ │ ├── plot_partial_dependence.py
│ │ ├── plot_permutation_importance.py
│ │ └── plot_permutation_importance_multicollinear.py
│ ├── kernel_approximation/
│ │ ├── README.txt
│ │ └── plot_scalable_poly_kernels.py
│ ├── linear_model/
│ │ ├── README.txt
│ │ ├── plot_ard.py
│ │ ├── plot_bayesian_ridge.py
│ │ ├── plot_bayesian_ridge_curvefit.py
│ │ ├── plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
│ │ ├── plot_huber_vs_ridge.py
│ │ ├── plot_iris_logistic.py
│ │ ├── plot_lasso_and_elasticnet.py
│ │ ├── plot_lasso_coordinate_descent_path.py
│ │ ├── plot_lasso_dense_vs_sparse_data.py
│ │ ├── plot_lasso_lars.py
│ │ ├── plot_lasso_model_selection.py
│ │ ├── plot_logistic.py
│ │ ├── plot_logistic_l1_l2_sparsity.py
│ │ ├── plot_logistic_multinomial.py
│ │ ├── plot_logistic_path.py
│ │ ├── plot_multi_task_lasso_support.py
│ │ ├── plot_nnls.py
│ │ ├── plot_ols.py
│ │ ├── plot_ols_3d.py
│ │ ├── plot_ols_ridge_variance.py
│ │ ├── plot_omp.py
│ │ ├── plot_poisson_regression_non_normal_loss.py
│ │ ├── plot_polynomial_interpolation.py
│ │ ├── plot_quantile_regression.py
│ │ ├── plot_ransac.py
│ │ ├── plot_ridge_coeffs.py
│ │ ├── plot_ridge_path.py
│ │ ├── plot_robust_fit.py
│ │ ├── plot_sgd_comparison.py
│ │ ├── plot_sgd_early_stopping.py
│ │ ├── plot_sgd_iris.py
│ │ ├── plot_sgd_loss_functions.py
│ │ ├── plot_sgd_penalties.py
│ │ ├── plot_sgd_separating_hyperplane.py
│ │ ├── plot_sgd_weighted_samples.py
│ │ ├── plot_sgdocsvm_vs_ocsvm.py
│ │ ├── plot_sparse_logistic_regression_20newsgroups.py
│ │ ├── plot_sparse_logistic_regression_mnist.py
│ │ ├── plot_theilsen.py
│ │ └── plot_tweedie_regression_insurance_claims.py
│ ├── manifold/
│ │ ├── README.txt
│ │ ├── plot_compare_methods.py
│ │ ├── plot_lle_digits.py
│ │ ├── plot_manifold_sphere.py
│ │ ├── plot_mds.py
│ │ ├── plot_swissroll.py
│ │ └── plot_t_sne_perplexity.py
│ ├── miscellaneous/
│ │ ├── README.txt
│ │ ├── plot_anomaly_comparison.py
│ │ ├── plot_changed_only_pprint_parameter.py
│ │ ├── plot_display_object_visualization.py
│ │ ├── plot_isotonic_regression.py
│ │ ├── plot_johnson_lindenstrauss_bound.py
│ │ ├── plot_kernel_approximation.py
│ │ ├── plot_kernel_ridge_regression.py
│ │ ├── plot_multilabel.py
│ │ ├── plot_multioutput_face_completion.py
│ │ ├── plot_partial_dependence_visualization_api.py
│ │ ├── plot_pipeline_display.py
│ │ └── plot_roc_curve_visualization_api.py
│ ├── mixture/
│ │ ├── README.txt
│ │ ├── plot_concentration_prior.py
│ │ ├── plot_gmm.py
│ │ ├── plot_gmm_covariances.py
│ │ ├── plot_gmm_pdf.py
│ │ ├── plot_gmm_selection.py
│ │ └── plot_gmm_sin.py
│ ├── model_selection/
│ │ ├── README.txt
│ │ ├── grid_search_text_feature_extraction.py
│ │ ├── plot_confusion_matrix.py
│ │ ├── plot_cv_indices.py
│ │ ├── plot_cv_predict.py
│ │ ├── plot_det.py
│ │ ├── plot_grid_search_digits.py
│ │ ├── plot_grid_search_refit_callable.py
│ │ ├── plot_grid_search_stats.py
│ │ ├── plot_learning_curve.py
│ │ ├── plot_multi_metric_evaluation.py
│ │ ├── plot_nested_cross_validation_iris.py
│ │ ├── plot_permutation_tests_for_classification.py
│ │ ├── plot_precision_recall.py
│ │ ├── plot_randomized_search.py
│ │ ├── plot_roc.py
│ │ ├── plot_roc_crossval.py
│ │ ├── plot_successive_halving_heatmap.py
│ │ ├── plot_successive_halving_iterations.py
│ │ ├── plot_train_error_vs_test_error.py
│ │ ├── plot_underfitting_overfitting.py
│ │ └── plot_validation_curve.py
│ ├── multioutput/
│ │ ├── README.txt
│ │ └── plot_classifier_chain_yeast.py
│ ├── neighbors/
│ │ ├── README.txt
│ │ ├── approximate_nearest_neighbors.py
│ │ ├── plot_caching_nearest_neighbors.py
│ │ ├── plot_classification.py
│ │ ├── plot_digits_kde_sampling.py
│ │ ├── plot_kde_1d.py
│ │ ├── plot_lof_novelty_detection.py
│ │ ├── plot_lof_outlier_detection.py
│ │ ├── plot_nca_classification.py
│ │ ├── plot_nca_dim_reduction.py
│ │ ├── plot_nca_illustration.py
│ │ ├── plot_nearest_centroid.py
│ │ ├── plot_regression.py
│ │ └── plot_species_kde.py
│ ├── neural_networks/
│ │ ├── README.txt
│ │ ├── plot_mlp_alpha.py
│ │ ├── plot_mlp_training_curves.py
│ │ ├── plot_mnist_filters.py
│ │ └── plot_rbm_logistic_classification.py
│ ├── preprocessing/
│ │ ├── README.txt
│ │ ├── plot_all_scaling.py
│ │ ├── plot_discretization.py
│ │ ├── plot_discretization_classification.py
│ │ ├── plot_discretization_strategies.py
│ │ ├── plot_map_data_to_normal.py
│ │ └── plot_scaling_importance.py
│ ├── release_highlights/
│ │ ├── README.txt
│ │ ├── plot_release_highlights_0_22_0.py
│ │ ├── plot_release_highlights_0_23_0.py
│ │ ├── plot_release_highlights_0_24_0.py
│ │ └── plot_release_highlights_1_0_0.py
│ ├── semi_supervised/
│ │ ├── README.txt
│ │ ├── plot_label_propagation_digits.py
│ │ ├── plot_label_propagation_digits_active_learning.py
│ │ ├── plot_label_propagation_structure.py
│ │ ├── plot_self_training_varying_threshold.py
│ │ ├── plot_semi_supervised_newsgroups.py
│ │ └── plot_semi_supervised_versus_svm_iris.py
│ ├── svm/
│ │ ├── README.txt
│ │ ├── plot_custom_kernel.py
│ │ ├── plot_iris_svc.py
│ │ ├── plot_linearsvc_support_vectors.py
│ │ ├── plot_oneclass.py
│ │ ├── plot_rbf_parameters.py
│ │ ├── plot_separating_hyperplane.py
│ │ ├── plot_separating_hyperplane_unbalanced.py
│ │ ├── plot_svm_anova.py
│ │ ├── plot_svm_kernels.py
│ │ ├── plot_svm_margin.py
│ │ ├── plot_svm_nonlinear.py
│ │ ├── plot_svm_regression.py
│ │ ├── plot_svm_scale_c.py
│ │ ├── plot_svm_tie_breaking.py
│ │ └── plot_weighted_samples.py
│ ├── text/
│ │ ├── README.txt
│ │ ├── plot_document_classification_20newsgroups.py
│ │ ├── plot_document_clustering.py
│ │ └── plot_hashing_vs_dict_vectorizer.py
│ └── tree/
│ ├── README.txt
│ ├── plot_cost_complexity_pruning.py
│ ├── plot_iris_dtc.py
│ ├── plot_tree_regression.py
│ ├── plot_tree_regression_multioutput.py
│ └── plot_unveil_tree_structure.py
├── lgtm.yml
├── maint_tools/
│ ├── check_pxd_in_installation.py
│ ├── create_issue_from_juint.py
│ ├── sort_whats_new.py
│ ├── test_docstrings.py
│ └── whats_missing.sh
├── pyproject.toml
├── setup.cfg
├── setup.py
└── sklearn/
├── __check_build/
│ ├── __init__.py
│ ├── _check_build.pyx
│ └── setup.py
├── __init__.py
├── _build_utils/
│ ├── __init__.py
│ ├── openmp_helpers.py
│ └── pre_build_helpers.py
├── _config.py
├── _distributor_init.py
├── _isotonic.pyx
├── _loss/
│ ├── __init__.py
│ ├── glm_distribution.py
│ └── tests/
│ ├── __init__.py
│ └── test_glm_distribution.py
├── _min_dependencies.py
├── base.py
├── calibration.py
├── cluster/
│ ├── __init__.py
│ ├── _affinity_propagation.py
│ ├── _agglomerative.py
│ ├── _bicluster.py
│ ├── _birch.py
│ ├── _dbscan.py
│ ├── _dbscan_inner.pyx
│ ├── _feature_agglomeration.py
│ ├── _hierarchical_fast.pyx
│ ├── _k_means_common.pxd
│ ├── _k_means_common.pyx
│ ├── _k_means_elkan.pyx
│ ├── _k_means_lloyd.pyx
│ ├── _k_means_minibatch.pyx
│ ├── _kmeans.py
│ ├── _mean_shift.py
│ ├── _optics.py
│ ├── _spectral.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── common.py
│ ├── test_affinity_propagation.py
│ ├── test_bicluster.py
│ ├── test_birch.py
│ ├── test_dbscan.py
│ ├── test_feature_agglomeration.py
│ ├── test_hierarchical.py
│ ├── test_k_means.py
│ ├── test_mean_shift.py
│ ├── test_optics.py
│ └── test_spectral.py
├── compose/
│ ├── __init__.py
│ ├── _column_transformer.py
│ ├── _target.py
│ └── tests/
│ ├── __init__.py
│ ├── test_column_transformer.py
│ └── test_target.py
├── conftest.py
├── covariance/
│ ├── __init__.py
│ ├── _elliptic_envelope.py
│ ├── _empirical_covariance.py
│ ├── _graph_lasso.py
│ ├── _robust_covariance.py
│ ├── _shrunk_covariance.py
│ └── tests/
│ ├── __init__.py
│ ├── test_covariance.py
│ ├── test_elliptic_envelope.py
│ ├── test_graphical_lasso.py
│ └── test_robust_covariance.py
├── cross_decomposition/
│ ├── __init__.py
│ ├── _pls.py
│ └── tests/
│ ├── __init__.py
│ └── test_pls.py
├── datasets/
│ ├── __init__.py
│ ├── _base.py
│ ├── _california_housing.py
│ ├── _covtype.py
│ ├── _kddcup99.py
│ ├── _lfw.py
│ ├── _olivetti_faces.py
│ ├── _openml.py
│ ├── _rcv1.py
│ ├── _samples_generator.py
│ ├── _species_distributions.py
│ ├── _svmlight_format_fast.pyx
│ ├── _svmlight_format_io.py
│ ├── _twenty_newsgroups.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── boston_house_prices.csv
│ │ ├── breast_cancer.csv
│ │ ├── iris.csv
│ │ ├── linnerud_exercise.csv
│ │ ├── linnerud_physiological.csv
│ │ └── wine_data.csv
│ ├── descr/
│ │ ├── __init__.py
│ │ ├── boston_house_prices.rst
│ │ ├── breast_cancer.rst
│ │ ├── california_housing.rst
│ │ ├── covtype.rst
│ │ ├── diabetes.rst
│ │ ├── digits.rst
│ │ ├── iris.rst
│ │ ├── kddcup99.rst
│ │ ├── lfw.rst
│ │ ├── linnerud.rst
│ │ ├── olivetti_faces.rst
│ │ ├── rcv1.rst
│ │ ├── twenty_newsgroups.rst
│ │ └── wine_data.rst
│ ├── images/
│ │ ├── README.txt
│ │ └── __init__.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── openml/
│ │ │ ├── __init__.py
│ │ │ ├── id_1/
│ │ │ │ └── __init__.py
│ │ │ ├── id_1119/
│ │ │ │ └── __init__.py
│ │ │ ├── id_2/
│ │ │ │ └── __init__.py
│ │ │ ├── id_292/
│ │ │ │ └── __init__.py
│ │ │ ├── id_3/
│ │ │ │ └── __init__.py
│ │ │ ├── id_40589/
│ │ │ │ └── __init__.py
│ │ │ ├── id_40675/
│ │ │ │ └── __init__.py
│ │ │ ├── id_40945/
│ │ │ │ └── __init__.py
│ │ │ ├── id_40966/
│ │ │ │ └── __init__.py
│ │ │ ├── id_42585/
│ │ │ │ └── __init__.py
│ │ │ ├── id_561/
│ │ │ │ └── __init__.py
│ │ │ ├── id_61/
│ │ │ │ └── __init__.py
│ │ │ └── id_62/
│ │ │ └── __init__.py
│ │ ├── svmlight_classification.txt
│ │ ├── svmlight_invalid.txt
│ │ ├── svmlight_invalid_order.txt
│ │ └── svmlight_multilabel.txt
│ ├── test_20news.py
│ ├── test_base.py
│ ├── test_california_housing.py
│ ├── test_common.py
│ ├── test_covtype.py
│ ├── test_kddcup99.py
│ ├── test_lfw.py
│ ├── test_olivetti_faces.py
│ ├── test_openml.py
│ ├── test_rcv1.py
│ ├── test_samples_generator.py
│ └── test_svmlight_format.py
├── decomposition/
│ ├── __init__.py
│ ├── _base.py
│ ├── _cdnmf_fast.pyx
│ ├── _dict_learning.py
│ ├── _factor_analysis.py
│ ├── _fastica.py
│ ├── _incremental_pca.py
│ ├── _kernel_pca.py
│ ├── _lda.py
│ ├── _nmf.py
│ ├── _online_lda_fast.pyx
│ ├── _pca.py
│ ├── _sparse_pca.py
│ ├── _truncated_svd.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_dict_learning.py
│ ├── test_factor_analysis.py
│ ├── test_fastica.py
│ ├── test_incremental_pca.py
│ ├── test_kernel_pca.py
│ ├── test_nmf.py
│ ├── test_online_lda.py
│ ├── test_pca.py
│ ├── test_sparse_pca.py
│ └── test_truncated_svd.py
├── discriminant_analysis.py
├── dummy.py
├── ensemble/
│ ├── __init__.py
│ ├── _bagging.py
│ ├── _base.py
│ ├── _forest.py
│ ├── _gb.py
│ ├── _gb_losses.py
│ ├── _gradient_boosting.pyx
│ ├── _hist_gradient_boosting/
│ │ ├── __init__.py
│ │ ├── _binning.pyx
│ │ ├── _bitset.pxd
│ │ ├── _bitset.pyx
│ │ ├── _gradient_boosting.pyx
│ │ ├── _loss.pyx
│ │ ├── _predictor.pyx
│ │ ├── binning.py
│ │ ├── common.pxd
│ │ ├── common.pyx
│ │ ├── gradient_boosting.py
│ │ ├── grower.py
│ │ ├── histogram.pyx
│ │ ├── loss.py
│ │ ├── predictor.py
│ │ ├── splitting.pyx
│ │ ├── tests/
│ │ │ ├── __init__.py
│ │ │ ├── test_binning.py
│ │ │ ├── test_bitset.py
│ │ │ ├── test_compare_lightgbm.py
│ │ │ ├── test_gradient_boosting.py
│ │ │ ├── test_grower.py
│ │ │ ├── test_histogram.py
│ │ │ ├── test_loss.py
│ │ │ ├── test_monotonic_contraints.py
│ │ │ ├── test_predictor.py
│ │ │ ├── test_splitting.py
│ │ │ └── test_warm_start.py
│ │ └── utils.pyx
│ ├── _iforest.py
│ ├── _stacking.py
│ ├── _voting.py
│ ├── _weight_boosting.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_bagging.py
│ ├── test_base.py
│ ├── test_common.py
│ ├── test_forest.py
│ ├── test_gradient_boosting.py
│ ├── test_gradient_boosting_loss_functions.py
│ ├── test_iforest.py
│ ├── test_stacking.py
│ ├── test_voting.py
│ └── test_weight_boosting.py
├── exceptions.py
├── experimental/
│ ├── __init__.py
│ ├── enable_halving_search_cv.py
│ ├── enable_hist_gradient_boosting.py
│ ├── enable_iterative_imputer.py
│ └── tests/
│ ├── __init__.py
│ ├── test_enable_hist_gradient_boosting.py
│ ├── test_enable_iterative_imputer.py
│ └── test_enable_successive_halving.py
├── externals/
│ ├── README
│ ├── __init__.py
│ ├── _arff.py
│ ├── _lobpcg.py
│ ├── _packaging/
│ │ ├── __init__.py
│ │ ├── _structures.py
│ │ └── version.py
│ ├── _pilutil.py
│ └── conftest.py
├── feature_extraction/
│ ├── __init__.py
│ ├── _dict_vectorizer.py
│ ├── _hash.py
│ ├── _hashing_fast.pyx
│ ├── _stop_words.py
│ ├── image.py
│ ├── setup.py
│ ├── tests/
│ │ ├── __init__.py
│ │ ├── test_dict_vectorizer.py
│ │ ├── test_feature_hasher.py
│ │ ├── test_image.py
│ │ └── test_text.py
│ └── text.py
├── feature_selection/
│ ├── __init__.py
│ ├── _base.py
│ ├── _from_model.py
│ ├── _mutual_info.py
│ ├── _rfe.py
│ ├── _sequential.py
│ ├── _univariate_selection.py
│ ├── _variance_threshold.py
│ └── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_chi2.py
│ ├── test_feature_select.py
│ ├── test_from_model.py
│ ├── test_mutual_info.py
│ ├── test_rfe.py
│ ├── test_sequential.py
│ └── test_variance_threshold.py
├── gaussian_process/
│ ├── __init__.py
│ ├── _gpc.py
│ ├── _gpr.py
│ ├── kernels.py
│ └── tests/
│ ├── __init__.py
│ ├── _mini_sequence_kernel.py
│ ├── test_gpc.py
│ ├── test_gpr.py
│ └── test_kernels.py
├── impute/
│ ├── __init__.py
│ ├── _base.py
│ ├── _iterative.py
│ ├── _knn.py
│ └── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_common.py
│ ├── test_impute.py
│ └── test_knn.py
├── inspection/
│ ├── __init__.py
│ ├── _partial_dependence.py
│ ├── _permutation_importance.py
│ ├── _plot/
│ │ ├── __init__.py
│ │ ├── partial_dependence.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ └── test_plot_partial_dependence.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_partial_dependence.py
│ └── test_permutation_importance.py
├── isotonic.py
├── kernel_approximation.py
├── kernel_ridge.py
├── linear_model/
│ ├── __init__.py
│ ├── _base.py
│ ├── _bayes.py
│ ├── _cd_fast.pyx
│ ├── _coordinate_descent.py
│ ├── _glm/
│ │ ├── __init__.py
│ │ ├── glm.py
│ │ ├── link.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_glm.py
│ │ └── test_link.py
│ ├── _huber.py
│ ├── _least_angle.py
│ ├── _logistic.py
│ ├── _omp.py
│ ├── _passive_aggressive.py
│ ├── _perceptron.py
│ ├── _quantile.py
│ ├── _ransac.py
│ ├── _ridge.py
│ ├── _sag.py
│ ├── _sag_fast.pyx.tp
│ ├── _sgd_fast.pxd
│ ├── _sgd_fast.pyx
│ ├── _sgd_fast_helpers.h
│ ├── _stochastic_gradient.py
│ ├── _theil_sen.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_bayes.py
│ ├── test_common.py
│ ├── test_coordinate_descent.py
│ ├── test_huber.py
│ ├── test_least_angle.py
│ ├── test_logistic.py
│ ├── test_omp.py
│ ├── test_passive_aggressive.py
│ ├── test_perceptron.py
│ ├── test_quantile.py
│ ├── test_ransac.py
│ ├── test_ridge.py
│ ├── test_sag.py
│ ├── test_sgd.py
│ ├── test_sparse_coordinate_descent.py
│ └── test_theil_sen.py
├── manifold/
│ ├── __init__.py
│ ├── _barnes_hut_tsne.pyx
│ ├── _isomap.py
│ ├── _locally_linear.py
│ ├── _mds.py
│ ├── _spectral_embedding.py
│ ├── _t_sne.py
│ ├── _utils.pyx
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_isomap.py
│ ├── test_locally_linear.py
│ ├── test_mds.py
│ ├── test_spectral_embedding.py
│ └── test_t_sne.py
├── metrics/
│ ├── __init__.py
│ ├── _base.py
│ ├── _classification.py
│ ├── _dist_metrics.pxd
│ ├── _dist_metrics.pyx
│ ├── _pairwise_fast.pyx
│ ├── _plot/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── confusion_matrix.py
│ │ ├── det_curve.py
│ │ ├── precision_recall_curve.py
│ │ ├── roc_curve.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_base.py
│ │ ├── test_common_curve_display.py
│ │ ├── test_confusion_matrix_display.py
│ │ ├── test_det_curve_display.py
│ │ ├── test_plot_confusion_matrix.py
│ │ ├── test_plot_curve_common.py
│ │ ├── test_plot_det_curve.py
│ │ ├── test_plot_precision_recall.py
│ │ ├── test_plot_roc_curve.py
│ │ ├── test_precision_recall_display.py
│ │ └── test_roc_curve_display.py
│ ├── _ranking.py
│ ├── _regression.py
│ ├── _scorer.py
│ ├── cluster/
│ │ ├── __init__.py
│ │ ├── _bicluster.py
│ │ ├── _expected_mutual_info_fast.pyx
│ │ ├── _supervised.py
│ │ ├── _unsupervised.py
│ │ ├── setup.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_bicluster.py
│ │ ├── test_common.py
│ │ ├── test_supervised.py
│ │ └── test_unsupervised.py
│ ├── pairwise.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_classification.py
│ ├── test_common.py
│ ├── test_dist_metrics.py
│ ├── test_pairwise.py
│ ├── test_ranking.py
│ ├── test_regression.py
│ └── test_score_objects.py
├── mixture/
│ ├── __init__.py
│ ├── _base.py
│ ├── _bayesian_mixture.py
│ ├── _gaussian_mixture.py
│ └── tests/
│ ├── __init__.py
│ ├── test_bayesian_mixture.py
│ ├── test_gaussian_mixture.py
│ └── test_mixture.py
├── model_selection/
│ ├── __init__.py
│ ├── _search.py
│ ├── _search_successive_halving.py
│ ├── _split.py
│ ├── _validation.py
│ └── tests/
│ ├── __init__.py
│ ├── common.py
│ ├── test_search.py
│ ├── test_split.py
│ ├── test_successive_halving.py
│ └── test_validation.py
├── multiclass.py
├── multioutput.py
├── naive_bayes.py
├── neighbors/
│ ├── __init__.py
│ ├── _ball_tree.pyx
│ ├── _base.py
│ ├── _binary_tree.pxi
│ ├── _classification.py
│ ├── _distance_metric.py
│ ├── _graph.py
│ ├── _kd_tree.pyx
│ ├── _kde.py
│ ├── _lof.py
│ ├── _nca.py
│ ├── _nearest_centroid.py
│ ├── _partition_nodes.pxd
│ ├── _partition_nodes.pyx
│ ├── _quad_tree.pxd
│ ├── _quad_tree.pyx
│ ├── _regression.py
│ ├── _unsupervised.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_ball_tree.py
│ ├── test_graph.py
│ ├── test_kd_tree.py
│ ├── test_kde.py
│ ├── test_lof.py
│ ├── test_nca.py
│ ├── test_nearest_centroid.py
│ ├── test_neighbors.py
│ ├── test_neighbors_pipeline.py
│ ├── test_neighbors_tree.py
│ └── test_quad_tree.py
├── neural_network/
│ ├── __init__.py
│ ├── _base.py
│ ├── _multilayer_perceptron.py
│ ├── _rbm.py
│ ├── _stochastic_optimizers.py
│ └── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_mlp.py
│ ├── test_rbm.py
│ └── test_stochastic_optimizers.py
├── pipeline.py
├── preprocessing/
│ ├── __init__.py
│ ├── _csr_polynomial_expansion.pyx
│ ├── _data.py
│ ├── _discretization.py
│ ├── _encoders.py
│ ├── _function_transformer.py
│ ├── _label.py
│ ├── _polynomial.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_common.py
│ ├── test_data.py
│ ├── test_discretization.py
│ ├── test_encoders.py
│ ├── test_function_transformer.py
│ ├── test_label.py
│ └── test_polynomial.py
├── random_projection.py
├── semi_supervised/
│ ├── __init__.py
│ ├── _label_propagation.py
│ ├── _self_training.py
│ └── tests/
│ ├── __init__.py
│ ├── test_label_propagation.py
│ └── test_self_training.py
├── setup.py
├── svm/
│ ├── __init__.py
│ ├── _base.py
│ ├── _bounds.py
│ ├── _classes.py
│ ├── _liblinear.pxi
│ ├── _liblinear.pyx
│ ├── _libsvm.pxi
│ ├── _libsvm.pyx
│ ├── _libsvm_sparse.pyx
│ ├── _newrand.pyx
│ ├── setup.py
│ ├── src/
│ │ ├── liblinear/
│ │ │ ├── COPYRIGHT
│ │ │ ├── _cython_blas_helpers.h
│ │ │ ├── liblinear_helper.c
│ │ │ ├── linear.cpp
│ │ │ ├── linear.h
│ │ │ ├── tron.cpp
│ │ │ └── tron.h
│ │ ├── libsvm/
│ │ │ ├── LIBSVM_CHANGES
│ │ │ ├── _svm_cython_blas_helpers.h
│ │ │ ├── libsvm_helper.c
│ │ │ ├── libsvm_sparse_helper.c
│ │ │ ├── libsvm_template.cpp
│ │ │ ├── svm.cpp
│ │ │ └── svm.h
│ │ └── newrand/
│ │ └── newrand.h
│ └── tests/
│ ├── __init__.py
│ ├── test_bounds.py
│ ├── test_sparse.py
│ └── test_svm.py
├── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_build.py
│ ├── test_calibration.py
│ ├── test_check_build.py
│ ├── test_common.py
│ ├── test_config.py
│ ├── test_discriminant_analysis.py
│ ├── test_docstring_parameters.py
│ ├── test_dummy.py
│ ├── test_init.py
│ ├── test_isotonic.py
│ ├── test_kernel_approximation.py
│ ├── test_kernel_ridge.py
│ ├── test_metaestimators.py
│ ├── test_min_dependencies_readme.py
│ ├── test_multiclass.py
│ ├── test_multioutput.py
│ ├── test_naive_bayes.py
│ ├── test_pipeline.py
│ └── test_random_projection.py
├── tree/
│ ├── __init__.py
│ ├── _classes.py
│ ├── _criterion.pxd
│ ├── _criterion.pyx
│ ├── _export.py
│ ├── _reingold_tilford.py
│ ├── _splitter.pxd
│ ├── _splitter.pyx
│ ├── _tree.pxd
│ ├── _tree.pyx
│ ├── _utils.pxd
│ ├── _utils.pyx
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_export.py
│ ├── test_reingold_tilford.py
│ └── test_tree.py
└── utils/
├── __init__.py
├── _arpack.py
├── _cython_blas.pxd
├── _cython_blas.pyx
├── _encode.py
├── _estimator_html_repr.py
├── _fast_dict.pxd
├── _fast_dict.pyx
├── _joblib.py
├── _logistic_sigmoid.pyx
├── _mask.py
├── _mocking.py
├── _openmp_helpers.pyx
├── _pprint.py
├── _random.pxd
├── _random.pyx
├── _readonly_array_wrapper.pyx
├── _seq_dataset.pxd.tp
├── _seq_dataset.pyx.tp
├── _show_versions.py
├── _tags.py
├── _testing.py
├── _typedefs.pxd
├── _typedefs.pyx
├── _weight_vector.pxd.tp
├── _weight_vector.pyx.tp
├── arrayfuncs.pyx
├── class_weight.py
├── deprecation.py
├── estimator_checks.py
├── extmath.py
├── fixes.py
├── graph.py
├── metaestimators.py
├── multiclass.py
├── murmurhash.pxd
├── murmurhash.pyx
├── optimize.py
├── random.py
├── setup.py
├── sparsefuncs.py
├── sparsefuncs_fast.pyx
├── src/
│ ├── MurmurHash3.cpp
│ └── MurmurHash3.h
├── stats.py
├── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_arpack.py
│ ├── test_arrayfuncs.py
│ ├── test_class_weight.py
│ ├── test_cython_blas.py
│ ├── test_cython_templating.py
│ ├── test_deprecation.py
│ ├── test_encode.py
│ ├── test_estimator_checks.py
│ ├── test_estimator_html_repr.py
│ ├── test_extmath.py
│ ├── test_fast_dict.py
│ ├── test_fixes.py
│ ├── test_graph.py
│ ├── test_metaestimators.py
│ ├── test_mocking.py
│ ├── test_multiclass.py
│ ├── test_murmurhash.py
│ ├── test_optimize.py
│ ├── test_parallel.py
│ ├── test_pprint.py
│ ├── test_random.py
│ ├── test_readonly_wrapper.py
│ ├── test_seq_dataset.py
│ ├── test_shortest_path.py
│ ├── test_show_versions.py
│ ├── test_sparsefuncs.py
│ ├── test_stats.py
│ ├── test_tags.py
│ ├── test_testing.py
│ ├── test_utils.py
│ ├── test_validation.py
│ └── test_weight_vector.py
└── validation.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .binder/postBuild
================================================
#!/bin/bash
set -e
# This script is called in a binder context. When this script is called, we are
# inside a git checkout of the scikit-learn/scikit-learn repo. This script is
# generating notebooks from the scikit-learn python examples.
if [[ ! -f /.dockerenv ]]; then
echo "This script was written for repo2docker and is supposed to run inside a docker container."
echo "Exiting because this script can delete data if run outside of a docker container."
exit 1
fi
# Back up content we need from the scikit-learn repo
TMP_CONTENT_DIR=/tmp/scikit-learn
mkdir -p $TMP_CONTENT_DIR
cp -r examples .binder $TMP_CONTENT_DIR
# delete everything in current directory including dot files and dot folders
find . -delete
# Generate notebooks and remove other files from examples folder
GENERATED_NOTEBOOKS_DIR=.generated-notebooks
cp -r $TMP_CONTENT_DIR/examples $GENERATED_NOTEBOOKS_DIR
find $GENERATED_NOTEBOOKS_DIR -name '*.py' -exec sphx_glr_python_to_jupyter.py '{}' +
NON_NOTEBOOKS=$(find $GENERATED_NOTEBOOKS_DIR -type f | grep -v '\.ipynb')
rm -f $NON_NOTEBOOKS
# Put the .binder folder back (may be useful for debugging purposes)
mv $TMP_CONTENT_DIR/.binder .
# Final clean up
rm -rf $TMP_CONTENT_DIR
# This is for compatibility with binder sphinx-gallery integration: this makes
# sure that the binder links generated by sphinx-gallery are correct even tough
# the repo we use for binder (scikit-learn/scikit-learn) is not the repo of the
# generated doc (scikit-learn/scikit-learn.github.io)
mkdir notebooks
ln -s ../$GENERATED_NOTEBOOKS_DIR notebooks/auto_examples
================================================
FILE: .binder/requirements.txt
================================================
--extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
--pre
matplotlib
scikit-image
pandas
sphinx-gallery
scikit-learn
================================================
FILE: .circleci/artifact_path
================================================
0/doc/_changed.html
================================================
FILE: .circleci/config.yml
================================================
version: 2.1
jobs:
doc-min-dependencies:
docker:
- image: circleci/python:3.7.7-buster
environment:
- OMP_NUM_THREADS: 2
- MKL_NUM_THREADS: 2
- CONDA_ENV_NAME: testenv
- PYTHON_VERSION: 3.7
- NUMPY_VERSION: 'min'
- SCIPY_VERSION: 'min'
- MATPLOTLIB_VERSION: 'min'
- CYTHON_VERSION: 'min'
- SCIKIT_IMAGE_VERSION: 'min'
- SPHINX_VERSION: 'min'
- PANDAS_VERSION: 'min'
- SPHINX_GALLERY_VERSION: 'min'
- NUMPYDOC_VERSION: 'min'
- SPHINX_PROMPT_VERSION: 'min'
- SPHINXEXT_OPENGRAPH_VERSION: 'min'
steps:
- checkout
- run: ./build_tools/circle/checkout_merge_commit.sh
- restore_cache:
key: v1-datasets-{{ .Branch }}
- restore_cache:
keys:
- doc-min-deps-ccache-{{ .Branch }}
- doc-min-deps-ccache
- run: ./build_tools/circle/build_doc.sh
- save_cache:
key: doc-min-deps-ccache-{{ .Branch }}-{{ .BuildNum }}
paths:
- ~/.ccache
- ~/.cache/pip
- save_cache:
key: v1-datasets-{{ .Branch }}
paths:
- ~/scikit_learn_data
- store_artifacts:
path: doc/_build/html/stable
destination: doc
- store_artifacts:
path: ~/log.txt
destination: log.txt
doc:
docker:
- image: circleci/python:3.7.7-buster
environment:
- OMP_NUM_THREADS: 2
- MKL_NUM_THREADS: 2
- CONDA_ENV_NAME: testenv
- PYTHON_VERSION: 3
- NUMPY_VERSION: 'latest'
- SCIPY_VERSION: 'latest'
- MATPLOTLIB_VERSION: 'latest'
- CYTHON_VERSION: 'latest'
- SCIKIT_IMAGE_VERSION: 'latest'
# Bump the sphinx version from time to time. Avoid latest sphinx version
# that tends to break things slightly too often
- SPHINX_VERSION: 4.2.0
- PANDAS_VERSION: 'latest'
- SPHINX_GALLERY_VERSION: 'latest'
- NUMPYDOC_VERSION: 'latest'
- SPHINX_PROMPT_VERSION: 'latest'
- SPHINXEXT_OPENGRAPH_VERSION: 'latest'
steps:
- checkout
- run: ./build_tools/circle/checkout_merge_commit.sh
- restore_cache:
key: v1-datasets-{{ .Branch }}
- restore_cache:
keys:
- doc-ccache-{{ .Branch }}
- doc-ccache
- run: ./build_tools/circle/build_doc.sh
- save_cache:
key: doc-ccache-{{ .Branch }}-{{ .BuildNum }}
paths:
- ~/.ccache
- ~/.cache/pip
- save_cache:
key: v1-datasets-{{ .Branch }}
paths:
- ~/scikit_learn_data
- store_artifacts:
path: doc/_build/html/stable
destination: doc
- store_artifacts:
path: ~/log.txt
destination: log.txt
# Persists generated documentation so that it can be attached and deployed
# in the 'deploy' step.
- persist_to_workspace:
root: doc/_build/html
paths: .
lint:
docker:
- image: circleci/python:3.7
steps:
- checkout
- run: ./build_tools/circle/checkout_merge_commit.sh
- run:
name: dependencies
command: sudo pip install flake8
- run:
name: linting
command: ./build_tools/circle/linting.sh
linux-arm64:
machine:
image: ubuntu-2004:202101-01
resource_class: arm.medium
environment:
# Use the latest supported version of python
- PYTHON_VERSION: '3.9'
- OMP_NUM_THREADS: 2
- OPENBLAS_NUM_THREADS: 2
- NUMPY_VERSION: 'latest'
- SCIPY_VERSION: 'latest'
- CYTHON_VERSION: 'latest'
- JOBLIB_VERSION: 'latest'
- THREADPOOLCTL_VERSION: 'latest'
- PYTEST_VERSION: 'latest'
- PYTEST_XDIST_VERSION: 'latest'
- TEST_DOCSTRINGS: 'true'
steps:
- checkout
- run: ./build_tools/circle/checkout_merge_commit.sh
- restore_cache:
key: linux-arm64-{{ .Branch }}
- run: ./build_tools/circle/build_test_arm.sh
- save_cache:
key: linux-arm64-{{ .Branch }}
paths:
- ~/.cache/ccache
- ~/.cache/pip
- ~/scikit_learn_data
# The source build folder.
- ~/project/build
deploy:
docker:
- image: circleci/python:3.7
steps:
- checkout
- run: ./build_tools/circle/checkout_merge_commit.sh
# Attach documentation generated in the 'doc' step so that it can be
# deployed.
- attach_workspace:
at: doc/_build/html
- run: ls -ltrh doc/_build/html/stable
- deploy:
command: |
if [[ "${CIRCLE_BRANCH}" =~ ^main$|^[0-9]+\.[0-9]+\.X$ ]]; then
bash build_tools/circle/push_doc.sh doc/_build/html/stable
fi
workflows:
version: 2
build-doc-and-deploy:
jobs:
- lint
- doc:
requires:
- lint
- doc-min-dependencies:
requires:
- lint
- deploy:
requires:
- doc
linux-arm64:
jobs:
- linux-arm64
================================================
FILE: .codecov.yml
================================================
comment: false
coverage:
status:
project:
default:
# Commits pushed to main should not make the overall
# project coverage decrease by more than 1%:
target: auto
threshold: 1%
patch:
default:
# Be tolerant on slight code coverage diff on PRs to limit
# noisy red coverage status on github PRs.
# Note: The coverage stats are still uploaded
# to codecov so that PR reviewers can see uncovered lines
target: auto
threshold: 1%
codecov:
notify:
# Prevent coverage status to upload multiple times for parallel and long
# running CI pipelines. This configuration is particularly useful on PRs
# to avoid confusion. Note that this value is set to the number of Azure
# Pipeline jobs uploading coverage reports.
after_n_builds: 6
ignore:
- "sklearn/externals"
- "sklearn/_build_utils"
- "**/setup.py"
================================================
FILE: .coveragerc
================================================
[run]
branch = True
source = sklearn
parallel = True
omit =
*/sklearn/externals/*
*/sklearn/_build_utils/*
*/benchmarks/*
**/setup.py
================================================
FILE: .git-blame-ignore-revs
================================================
# Since git version 2.23, git-blame has a feature to ignore
# certain commits.
#
# This file contains a list of commits that are not likely what
# you are looking for in `git blame`. You can set this file as
# a default ignore file for blame by running the following
# command.
#
# $ git config blame.ignoreRevsFile .git-blame-ignore-revs
# PR 18948: Migrate code style to Black
82df48934eba1df9a1ed3be98aaace8eada59e6e
# PR 20294: Use target_version >= 3.7 in Black
351ace7935a4ea685171cc6d174890f08facd561
# PR 20412: Use experimental_string_processing=true in Black
3ae7c7615343bbd36acece57825d8b0d70fd9da4
# PR 20502: Runs Black on examples
70a185ae59b4362633d18b0d0083abb1b6f7370c
================================================
FILE: .gitattributes
================================================
/doc/whats_new/v*.rst merge=union
================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: ['https://numfocus.org/donate-to-scikit-learn']
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug Report
description: Create a report to help us reproduce and correct the bug
labels: ['Bug: triage']
body:
- type: markdown
attributes:
value: >
#### Before submitting a bug, please make sure the issue hasn't been already
addressed by searching through [the past issues](https://github.com/scikit-learn/scikit-learn/issues).
- type: textarea
attributes:
label: Describe the bug
description: >
A clear and concise description of what the bug is.
validations:
required: true
- type: textarea
attributes:
label: Steps/Code to Reproduce
description: |
Please add a minimal example that we can reproduce the error by running the code. Be as succinct as possible, do not depend on external data. In short, we are going to copy-paste your code and we expect to get the same result as you. Example:
```python
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
docs = ["Help I have a bug" for i in range(1000)]
vectorizer = CountVectorizer(input=docs, analyzer='word')
lda_features = vectorizer.fit_transform(docs)
lda_model = LatentDirichletAllocation(
n_topics=10,
learning_method='online',
evaluate_every=10,
n_jobs=4,
)
model = lda_model.fit(lda_features)
```
If the code is too long, feel free to put it in a public gist and link it in the issue: https://gist.github.com.
placeholder: |
```
Sample code to reproduce the problem
```
validations:
required: true
- type: textarea
attributes:
label: Expected Results
description: >
Please paste or describe the expected results.
placeholder: >
Example: No error is thrown.
validations:
required: true
- type: textarea
attributes:
label: Actual Results
description: >
Please paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception.
placeholder: >
Please paste or specifically describe the actual output or traceback.
validations:
required: true
- type: textarea
attributes:
label: Versions
description: |
Please run the following and paste the output below.
```python
import sklearn; sklearn.show_versions()
```
validations:
required: true
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
contact_links:
- name: Discussions
url: https://github.com/scikit-learn/scikit-learn/discussions/new
about: Ask questions and discuss with other scikit-learn community members
- name: Stack Overflow
url: https://stackoverflow.com/questions/tagged/scikit-learn
about: Please ask and answer usage questions on Stack Overflow
- name: Mailing list
url: https://mail.python.org/mailman/listinfo/scikit-learn
about: General discussions and announcements on the mailing list
- name: Gitter
url: https://gitter.im/scikit-learn/scikit-learn
about: Users and developers can sometimes be found on the gitter channel
- name: Blank issue
url: https://github.com/scikit-learn/scikit-learn/issues/new
about: Please note that Github Discussions should be used in most cases instead
================================================
FILE: .github/ISSUE_TEMPLATE/doc_improvement.yml
================================================
name: Documentation improvement
description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change.
labels: [Documentation]
body:
- type: textarea
attributes:
label: Describe the issue linked to the documentation
description: >
Tell us about the confusion introduced in the documentation.
validations:
required: true
- type: textarea
attributes:
label: Suggest a potential alternative/fix
description: >
Tell us how we could improve the documentation in this regard.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: Feature request
description: Suggest a new algorithm, enhancement to an existing algorithm, etc.
labels: ['New Feature']
body:
- type: markdown
attributes:
value: >
#### If you want to propose a new algorithm, please refer first to the [scikit-learn inclusion criterion](https://scikit-learn.org/stable/faq.html#what-are-the-inclusion-criteria-for-new-algorithms).
- type: textarea
attributes:
label: Describe the workflow you want to enable
validations:
required: true
- type: textarea
attributes:
label: Describe your proposed solution
validations:
required: true
- type: textarea
attributes:
label: Describe alternatives you've considered, if relevant
- type: textarea
attributes:
label: Additional context
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
<!--
Thanks for contributing a pull request! Please ensure you have taken a look at
the contribution guidelines: https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md
-->
#### Reference Issues/PRs
<!--
Example: Fixes #1234. See also #3456.
Please use keywords (e.g., Fixes) to create link to the issues or pull requests
you resolved, so that they will automatically be closed when your pull request
is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests
-->
#### What does this implement/fix? Explain your changes.
#### Any other comments?
<!--
Please be aware that we are a loose team of volunteers so patience is
necessary; assistance handling other issues is very welcome. We value
all user contributions, no matter how minor they are. If we are slow to
review, either the pull request needs some benchmarking, tinkering,
convincing, etc. or more likely the reviewers are simply busy. In either
case, we ask for your understanding during the review process.
For more information, see our FAQ on this topic:
http://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention.
Thanks for contributing!
-->
================================================
FILE: .github/labeler-file-extensions.yml
================================================
cython:
- sklearn/**/*.pyx
- sklearn/**/*.pxd
- sklearn/**/*.pxi
# Tempita templates
- sklearn/**/*.pyx.tp
- sklearn/**/*.pxd.tp
- sklearn/**/*.pxi.tp
================================================
FILE: .github/labeler-module.yml
================================================
module:cluster:
- sklearn/cluster/**/*
module:common:
- sklearn/common/**/*
module:compose:
- sklearn/compose/**/*
module:covariance:
- sklearn/covariance/**/*
module:cross_decomposition:
- sklearn/cross_decomposition/**/*
module:datasets:
- sklearn/datasets/**/*
module:decomposition:
- sklearn/decomposition/**/*
module:ensemble:
- sklearn/ensemble/**/*
module:feature_extraction:
- sklearn/feature_extraction/**/*
module:feature_selection:
- sklearn/feature_selection/**/*
module:gaussian_process:
- sklearn/gaussian_process/**/*
module:impute:
- sklearn/impute/**/*
module:inspection:
- sklearn/inspection/**/*
module:linear_model:
- sklearn/linear_model/**/*
module:manifold:
- sklearn/manifold/**/*
module:metrics:
- sklearn/metrics/**/*
module:mixture:
- sklearn/mixture/**/*
module:model_selection:
- sklearn/model_selection/**/*
module:naive_bayes:
- sklearn/naive_bayes.py
module:neighbors:
- sklearn/neighbors/**/*
module:neural_network:
- sklearn/neural_network/**/*
module:pipeline:
- sklearn/pipeline.py
module:preprocessing:
- sklearn/preprocessing/**/*
module:semi_supervised:
- sklearn/semi_supervised/**/*
module:svm:
- sklearn/svm/**/*
module:tree:
- sklearn/tree/**/*
module:utils:
- sklearn/utils/**/*
================================================
FILE: .github/scripts/label_title_regex.py
================================================
"""Labels PRs based on title. Must be run in a github action with the
pull_request_target event."""
from github import Github
import os
import json
import re
context_dict = json.loads(os.getenv("CONTEXT_GITHUB"))
repo = context_dict["repository"]
g = Github(context_dict["token"])
repo = g.get_repo(repo)
pr_number = context_dict["event"]["number"]
issue = repo.get_issue(number=pr_number)
title = issue.title
regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")]
labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)]
if labels_to_add:
issue.add_to_labels(*labels_to_add)
================================================
FILE: .github/workflows/assign.yml
================================================
name: Assign
on:
issue_comment:
types: created
jobs:
one:
runs-on: ubuntu-latest
if: >-
(github.event.comment.body == 'take' ||
github.event.comment.body == 'Take')
&& !github.event.issue.assignee
steps:
- run: |
echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X "DELETE" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/help%20wanted
================================================
FILE: .github/workflows/check-changelog.yml
================================================
name: Check Changelog
# This check makes sure that the changelog is properly updated
# when a PR introduces a change in a test file.
# To bypass this check, label the PR with "No Changelog Needed".
on:
pull_request:
types: [opened, edited, labeled, unlabeled, synchronize]
jobs:
check:
runs-on: ubuntu-latest
if: ${{ contains(github.event.pull_request.labels.*.name, 'No Changelog Needed') == 0 }}
steps:
- name: Get PR number and milestone
run: |
echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
echo "TAGGED_MILESTONE=${{ github.event.pull_request.milestone.title }}" >> $GITHUB_ENV
- uses: actions/checkout@v2
with:
fetch-depth: '0'
- name: Check the changelog
run: |
set -xe
changed_files=$(git diff --name-only origin/main)
# Changelog should be updated only if tests have been modified
if [[ ! "$changed_files" =~ tests ]]
then
exit 0
fi
all_changelogs=$(cat ./doc/whats_new/v*.rst)
if [[ "$all_changelogs" =~ :pr:\`$PR_NUMBER\` ]]
then
echo "Changelog has been updated."
# If the pull request is milestoned check the correspondent changelog
if exist -f ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst
then
expected_changelog=$(cat ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst)
if [[ "$expected_changelog" =~ :pr:\`$PR_NUMBER\` ]]
then
echo "Changelog and milestone correspond."
else
echo "Changelog and milestone do not correspond."
echo "If you see this error make sure that the tagged milestone for the PR"
echo "and the edited changelog filename properly match."
exit 1
fi
fi
else
echo "A Changelog entry is missing."
echo ""
echo "Please add an entry to the changelog at 'doc/whats_new/v*.rst'"
echo "to document your change assuming that the PR will be merged"
echo "in time for the next release of scikit-learn."
echo ""
echo "Look at other entries in that file for inspiration and please"
echo "reference this pull request using the ':pr:' directive and"
echo "credit yourself (and other contributors if applicable) with"
echo "the ':user:' directive."
echo ""
echo "If you see this error and there is already a changelog entry,"
echo "check that the PR number is correct."
echo ""
echo" If you believe that this PR does no warrant a changelog"
echo "entry, say so in a comment so that a maintainer will label "
echo "the PR with 'No Changelog Needed' to bypass this check."
exit 1
fi
================================================
FILE: .github/workflows/check-manifest.yml
================================================
name: "Check Manifest"
on:
schedule:
- cron: '0 0 * * *'
jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.9'
- name: Install dependencies
# scipy and cython are required to build sdist
run: |
python -m pip install --upgrade pip
pip install check-manifest scipy cython
- run: |
check-manifest -v
================================================
FILE: .github/workflows/labeler-module.yml
================================================
name: "Pull Request Labeler"
on: pull_request_target
jobs:
triage:
runs-on: ubuntu-latest
steps:
- uses: thomasjpfan/labeler@v2.5.0
continue-on-error: true
if: github.repository == 'scikit-learn/scikit-learn'
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
max-labels: "3"
configuration-path: ".github/labeler-module.yml"
triage_file_extensions:
runs-on: ubuntu-latest
steps:
- uses: thomasjpfan/labeler@v2.5.0
continue-on-error: true
if: github.repository == 'scikit-learn/scikit-learn'
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: ".github/labeler-file-extensions.yml"
================================================
FILE: .github/workflows/labeler-title-regex.yml
================================================
name: Pull Request Regex Title Labeler
on:
pull_request_target:
types: [opened, edited]
permissions:
contents: read
pull-requests: write
jobs:
labeler:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.9'
- name: Install PyGithub
run: pip install -Uq PyGithub
- name: Label pull request
run: python .github/scripts/label_title_regex.py
env:
CONTEXT_GITHUB: ${{ toJson(github) }}
================================================
FILE: .github/workflows/publish_pypi.yml
================================================
name: Publish to Pypi
on:
workflow_dispatch:
inputs:
version:
description: 'Version upload to pypi'
required: true
pypi_repo:
description: 'Repo to upload to (testpypi or pypi)'
default: 'testpypi'
required: true
jobs:
publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Install dependencies
run: |
pip install -U wheelhouse_uploader pyyaml
- name: Downloading wheels and sdist from staging
env:
SKLEARN_VERSION: ${{ github.event.inputs.version }}
run: |
echo "Download $SKLEARN_VERSION wheels and sdist"
python -m wheelhouse_uploader fetch \
--version $SKLEARN_VERSION \
--local-folder dist/ \
scikit-learn \
https://pypi.anaconda.org/scikit-learn-wheels-staging/simple/scikit-learn/
- name: Check dist has the correct number of artifacts
run: |
python build_tools/github/check_wheels.py
- name: Publish package to TestPyPI
uses: pypa/gh-action-pypi-publish@v1.4.1
with:
user: __token__
password: ${{ secrets.TEST_PYPI_TOKEN }}
repository_url: https://test.pypi.org/legacy/
if: ${{ github.event.inputs.pypi_repo == 'testpypi' }}
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@v1.4.1
with:
user: __token__
password: ${{ secrets.PYPI_TOKEN }}
if: ${{ github.event.inputs.pypi_repo == 'pypi' }}
================================================
FILE: .github/workflows/twitter.yml
================================================
# Tweet the URL of a commit on @sklearn_commits whenever a push event
# happens on the main branch
name: Twitter Push Notification
on:
push:
branches:
- main
jobs:
tweet:
name: Twitter Notification
runs-on: ubuntu-latest
steps:
- name: Tweet URL of last commit as @sklearn_commits
if: github.repository == 'scikit-learn/scikit-learn'
uses: docker://thomasjpfan/twitter-action:0.3
with:
args: "-message \"https://github.com/scikit-learn/scikit-learn/commit/${{ github.sha }}\""
env:
TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }}
TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }}
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
================================================
FILE: .github/workflows/unassign.yml
================================================
name: Unassign
#Runs when a contributor has unassigned themselves from the issue and adds 'help wanted'
on:
issues:
types: unassigned
jobs:
one:
runs-on: ubuntu-latest
steps:
- name:
if: github.event.issue.state == 'open'
run: |
echo "Marking issue ${{ github.event.issue.number }} as help wanted"
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"labels": ["help wanted"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels
================================================
FILE: .github/workflows/wheels.yml
================================================
# Workflow to build and test wheels
name: Wheel builder
on:
schedule:
# Nightly build at 3:42 A.M.
- cron: "42 3 */1 * *"
push:
branches:
- main
# Release branches
- "[0-9]+.[0-9]+.X"
pull_request:
branches:
- main
- "[0-9]+.[0-9]+.X"
# Manual run
workflow_dispatch:
jobs:
# Check whether to build the wheels and the source tarball
check_build_trigger:
name: Check build trigger
runs-on: ubuntu-latest
if: github.repository == 'scikit-learn/scikit-learn'
outputs:
build: ${{ steps.check_build_trigger.outputs.build }}
steps:
- name: Checkout scikit-learn
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.sha }}
- id: check_build_trigger
name: Check build trigger
run: bash build_tools/github/check_build_trigger.sh
# Build the wheels for Linux, Windows and macOS for Python 3.7 and newer
build_wheels:
name: Build wheel for cp${{ matrix.python }}-${{ matrix.platform_id }}-${{ matrix.manylinux_image }}
runs-on: ${{ matrix.os }}
needs: check_build_trigger
if: needs.check_build_trigger.outputs.build
strategy:
# Ensure that a wheel builder finishes even if another fails
fail-fast: false
matrix:
os: [windows-latest, ubuntu-latest, macos-latest]
python: [37, 38, 39]
bitness: [32, 64]
manylinux_image: [manylinux1, manylinux2010]
include:
# Run 32 and 64 bit version in parallel for Linux and Windows
- os: windows-latest
bitness: 64
platform_id: win_amd64
- os: windows-latest
bitness: 32
platform_id: win32
- os: ubuntu-latest
bitness: 64
platform_id: manylinux_x86_64
- os: ubuntu-latest
bitness: 32
platform_id: manylinux_i686
- os: macos-latest
bitness: 64
platform_id: macosx_x86_64
exclude:
- os: macos-latest
bitness: 32
# Remove manylinux1 from the windows and osx build matrix since
# manylinux_image is not used for these platforms
- os: windows-latest
manylinux_image: manylinux1
- os: macos-latest
manylinux_image: manylinux1
steps:
- name: Checkout scikit-learn
uses: actions/checkout@v1
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.9' # update once build dependencies are available
- name: Build and test wheels
env:
CONFTEST_PATH: ${{ github.workspace }}/conftest.py
CONFTEST_NAME: conftest.py
CIBW_ENVIRONMENT: OMP_NUM_THREADS=2
OPENBLAS_NUM_THREADS=2
SKLEARN_SKIP_NETWORK_TESTS=1
SKLEARN_BUILD_PARALLEL=3
MACOSX_DEPLOYMENT_TARGET=10.13
CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}
CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux_image }}
CIBW_MANYLINUX_I686_IMAGE: ${{ matrix.manylinux_image }}
CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: bash build_tools/github/repair_windows_wheels.sh {wheel} {dest_dir} ${{ matrix.bitness }}
CIBW_BEFORE_TEST_WINDOWS: bash build_tools/github/build_minimal_windows_image.sh ${{ matrix.python }} ${{ matrix.bitness }}
CIBW_TEST_REQUIRES: pytest pandas threadpoolctl
CIBW_TEST_COMMAND: bash {project}/build_tools/github/test_wheels.sh
CIBW_TEST_COMMAND_WINDOWS: bash {project}/build_tools/github/test_windows_wheels.sh ${{ matrix.python }} ${{ matrix.bitness }}
CIBW_BUILD_VERBOSITY: 1
run: bash build_tools/github/build_wheels.sh
- name: Store artifacts
uses: actions/upload-artifact@v2
with:
path: wheelhouse/*.whl
# Build the source distribution under Linux
build_sdist:
name: Source distribution
runs-on: ubuntu-latest
needs: check_build_trigger
if: needs.check_build_trigger.outputs.build
steps:
- name: Checkout scikit-learn
uses: actions/checkout@v1
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.9' # update once build dependencies are available
- name: Build source distribution
run: bash build_tools/github/build_source.sh
env:
SKLEARN_BUILD_PARALLEL: 3
- name: Test source distribution
run: bash build_tools/github/test_source.sh
env:
OMP_NUM_THREADS: 2
OPENBLAS_NUM_THREADS: 2
SKLEARN_SKIP_NETWORK_TESTS: 1
- name: Store artifacts
uses: actions/upload-artifact@v2
with:
path: dist/*.tar.gz
# Upload the wheels and the source distribution
upload_anaconda:
name: Upload to Anaconda
runs-on: ubuntu-latest
needs: [build_wheels, build_sdist]
# The artifacts cannot be uploaded on PRs
if: github.event_name != 'pull_request'
steps:
- name: Checkout scikit-learn
uses: actions/checkout@v1
- name: Download artifacts
uses: actions/download-artifact@v2
with:
path: dist
- name: Setup Python
uses: actions/setup-python@v2
- name: Upload artifacts
env:
# Secret variables need to be mapped to environment variables explicitly
SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN }}
SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_STAGING_UPLOAD_TOKEN }}
# Force a replacement if the remote file already exists
run: bash build_tools/github/upload_anaconda.sh
================================================
FILE: .gitignore
================================================
*.pyc
*.so
*.pyd
*~
.#*
*.lprof
*.swp
*.swo
.DS_Store
build
sklearn/datasets/__config__.py
sklearn/**/*.html
dist/
MANIFEST
doc/_build/
doc/auto_examples/
doc/modules/generated/
doc/datasets/generated/
doc/min_dependency_table.rst
doc/min_dependency_substitutions.rst
*.pdf
pip-log.txt
scikit_learn.egg-info/
.coverage
coverage
*.py,cover
.tags*
tags
covtype.data.gz
20news-18828/
20news-18828.tar.gz
coverages.zip
samples.zip
doc/coverages.zip
doc/samples.zip
coverages
samples
doc/coverages
doc/samples
*.prof
.tox/
.coverage
pip-wheel-metadata
lfw_preprocessed/
nips2010_pdf/
*.nt.bz2
*.tar.gz
*.tgz
examples/cluster/joblib
reuters/
benchmarks/bench_covertype_data/
*.prefs
.pydevproject
.idea
.vscode
*.c
*.cpp
!/**/src/**/*.c
!/**/src/**/*.cpp
*.sln
*.pyproj
# Used by py.test
.cache
.pytest_cache/
_configtest.o.d
# Used by mypy
.mypy_cache/
# files generated from a template
sklearn/utils/_seq_dataset.pyx
sklearn/utils/_seq_dataset.pxd
sklearn/utils/_weight_vector.pyx
sklearn/utils/_weight_vector.pxd
sklearn/linear_model/_sag_fast.pyx
================================================
FILE: .mailmap
================================================
Alexandre Gramfort <alexandre.gramfort@inria.fr> <alexandre.gramfort@gmail.com>
Alexandre Gramfort <alexandre.gramfort@inria.fr> <alexandre.gramfort@m4x.org>
Alexandre Gramfort <alexandre.gramfort@inria.fr> <gramfort@localhost.(none)>
Alexandre Saint <snt.alex@gmail.com>
Andreas Mueller <amueller@ais.uni-bonn.de>
Andreas Mueller <amueller@ais.uni-bonn.de> <Andreas Mueller@MSRC-3645211.europe.corp.microsoft.com>
Andreas Mueller <amueller@ais.uni-bonn.de> <amueller@ais.uni-bonn.de>
Andreas Mueller <amueller@ais.uni-bonn.de> <amueller@templateimage.ista.local>
Andreas Mueller <amueller@ais.uni-bonn.de> <andy@marvin>
Andreas Mueller <amueller@ais.uni-bonn.de> <t3kcit@gmail.com>
Arnaud Joly <a.joly@ulg.ac.be>
Arnaud Joly <a.joly@ulg.ac.be> <arnaud.joly@yahoo.com>
Arnaud Joly <a.joly@ulg.ac.be> <arnaud.v.joly@gmail.com>
Anne-Laure Fouque <afouque@is208050.(none)> <af216607@is206635.intra.cea.fr>
Ariel Rokem <arokem@berkeley.edu> arokem <arokem@berkeley.edu>
Bala Subrahmanyam Varanasi <balu@agiliq.com>
Bertrand Thirion <bertrand.thirion@inria.fr>
Brandyn A. White <bwhite@dappervision.com>
Brian Cheung <bcheung5@gmail.com> <bcheung@rocky.rfmh.org>
Brian Cheung <bcheung5@gmail.com> <briancheung>
Brian Cheung <bcheung5@gmail.com> <cow@rusty.(none)>
Brian Holt <bh00038@cvplws63.eps.surrey.ac.uk> <bdholt1@gmail.com>
Christian Osendorfer <osendorf@gmail.com>
Clay Woolam <clay@woolam.org>
Danny Sullivan <dsullivan7@hotmail.com> <dbsullivan23@gmail.com>
Denis Engemann <denis-alexander.engemann@inria.fr>
Denis Engemann <denis-alexander.engemann@inria.fr> <denis.engemann@gmail.com>
Denis Engemann <denis-alexander.engemann@inria.fr> <dengemann@Deniss-MacBook-Pro.local>
Denis Engemann <denis-alexander.engemann@inria.fr> dengemann <denis.engemann@gmail.com>
Diego Molla <dmollaaliod@gmail.com> <diego@diego-desktop.(none)>
DraXus <draxus@gmail.com> draxus <draxus@hammer.ugr>
Edouard DUCHESNAY <ed203246@is206877.intra.cea.fr> <duchesnay@is143433.(none)>
Edouard DUCHESNAY <ed203246@is206877.intra.cea.fr> <edouard.duchesnay@gmail.com>
Edouard DUCHESNAY <ed203246@is206877.intra.cea.fr> <edouard@is2206219.(none)>
Emmanuelle Gouillart <emmanuelle.gouillart@nsup.org>
Emmanuelle Gouillart <emmanuelle.gouillart@nsup.org> <emma@aleph.(none)>
Eustache Diemert <eustache@diemert.fr>
Fabian Pedregosa <fabian.pedregosa@inria.fr>
Fabian Pedregosa <fabian.pedregosa@inria.fr> <fabian@fseoane.net>
Fabian Pedregosa <fabian.pedregosa@inria.fr> <f@bianp.net>
Federico Vaggi <vaggi.federico@gmail.com>
Federico Vaggi <vaggi.federico@gmail.com> <vaggi.federico@GMAIL.COM>
Gael Varoquaux <gael.varoquaux@inria.fr>
Gael Varoquaux <gael.varoquaux@inria.fr> <gael.varoquaux@normalesup.org>
Gael Varoquaux <gael.varoquaux@inria.fr> <varoquau@normalesup.org>
Giorgio Patrini <giorgio.patrini@nicta.com.au>
Giorgio Patrini <giorgio.patrini@nicta.com.au> <giorgiop@users.noreply.github.com>
Gilles Louppe <g.louppe@gmail.com> <g.louppe@ulg.ac.be>
Hamzeh Alsalhi <93hamsal@gmail.com>
Harikrishnan S <hihari777@gmail.com>
Hendrik Heuer <hendrikheuer@gmail.com>
Henry Lin <hlin117@gmail.com>
Hrishikesh Huilgolkar <hrishikesh911@gmail.com> <hrishikesh@QE-IND-WKS007.(none)>
Hugo Bowne-Anderson <hugobowne@gmail.com>
Imaculate <imaculatemosha@yahoo.com>
Immanuel Bayer <mane.desk@gmail.com>
Jacob Schreiber <jmschreiber91@gmail.com>
Jacob Schreiber <jmschreiber91@gmail.com> <jmschr@cs.washington.edu>
Jake VanderPlas <vanderplas@astro.washington.edu> <jakevdp@yahoo.com>
Jake VanderPlas <vanderplas@astro.washington.edu> <jakevdp@gmail.com>
Jake VanderPlas <vanderplas@astro.washington.edu> <vanderplas@astro.washington.edu>
James Bergstra <james.bergstra@gmail.com>
Jaques Grobler <jaques.grobler@inria.fr> <jaquesgrobler@gmail.com>
Jan Schlüter <scikit-learn@jan-schlueter.de>
Jean Kossaifi <jean.kossaifi@gmail.com>
Jean Kossaifi <jean.kossaifi@gmail.com> <jkossaifi@is208616.intra.cea.fr>
Jean Kossaifi <jean.kossaifi@gmail.com> <kossaifi@is208616.intra.cea.fr>
Joel Nothman <joel.nothman@gmail.com> <jnothman@student.usyd.edu.au>
Kyle Kastner <kastnerkyle@gmail.com>
Lars Buitinck <L.J.Buitinck@uva.nl> <Lars@.(none)>
Lars Buitinck <L.J.Buitinck@uva.nl> <l.j.buitinck@uva.nl>
Lars Buitinck <L.J.Buitinck@uva.nl> <larsmans@gmail.com>
Lars Buitinck <L.J.Buitinck@uva.nl> <larsmans@users.noreply.github.com>
Lars Buitinck <L.J.Buitinck@uva.nl> <l.buitinck@esciencecenter.nl>
Loic Esteve <loic.esteve@ymail.com>
Manoj Kumar <manojkumarsivaraj334@gmail.com>
Matthieu Perrot <matthieu.perrot@cea.fr> <revilyo@earth.(none)>
Maheshakya Wijewardena <maheshakya@wso2.com>
Michael Bommarito <michael@bommaritollc.com>
Michael Eickenberg <michael.eickenberg@gmail.com>
Michael Eickenberg <michael.eickenberg@gmail.com> <me232320@is146139.intra.cea.fr>
Samuel Charron <samuel.charron@data-publica.com> <samuel.charron@gmail.com>
Sergio Medina <sergio.medina@inria.fr> <smedina@work4labs.com>
Nelle Varoquaux <nelle.varoquaux@gmail.com>
Nelle Varoquaux <nelle.varoquaux@gmail.com> <nelle@phgroup.com>
Nelle Varoquaux <nelle.varoquaux@gmail.com> <nelle@varoquaux@gmail.com>
Nicolas Goix <goix.nicolas@gmail.com>
Nicolas Pinto <pinto@alum.mit.edu> <pinto@mit.edu>
Noel Dawe <Noel.Dawe@cern.ch> <noel.dawe@gmail.com>
Noel Dawe <Noel.Dawe@cern.ch> <noel.dAwe@cern.ch>
Olivier Grisel <olivier.grisel@ensta.org> <ogrisel@turingcarpet.(none)>
Olivier Grisel <olivier.grisel@ensta.org> <olivier.grisel@ensta.org>
Olivier Hervieu <olivier.hervieu@gmail.com> <olivier.hervieu@tinyclues.com>
Paul Butler <paulgb@gmail.com>
Peter Prettenhofer <peter.prettenhofer@gmail.com>
Raghav RV <rvraghav93@gmail.com>
Raghav RV <rvraghav93@gmail.com> <ragvrv@gmail.com>
Robert Layton <robertlayton@gmail.com>
Roman Sinayev <roman.sinayev@gmail.com>
Roman Sinayev <roman.sinayev@gmail.com> <roman@y570.(none)>
Ronald Phlypo <Ronald.Phlypo@inria.fr>
Satrajit Ghosh <satra@mit.edu> <satrajit.ghosh@gmail.com>
Sebastian Raschka <se.raschka@me.com>
Sebastian Raschka <mail@sebastianraschka.com> <se.raschka@me.com>
Shiqiao Du <lucidfrontier.45@gmail.com>
Shiqiao Du <lucidfrontier.45@gmail.com> <s.du@freebit.net>
Thomas Unterthiner <thomas.unterthiner@gmx.net>
Tim Sheerman-Chase <t.sheerman-chase@surrey.ac.uk> <ts00051@ts00051-desktop.(none)>
Vincent Dubourg <vincent.dubourg@gmail.com>
Vincent Dubourg <vincent.dubourg@gmail.com> <dubourg@PTlami14.(none)>
Vincent Michel <vincent.michel@inria.fr> <vincent.michel@logilab.fr>
Vincent Michel <vincent.michel@inria.fr> <vincent@axon.(none)>
Vincent Michel <vincent.michel@inria.fr> <vincent@vincent.org>
Vincent Michel <vincent.michel@inria.fr> <vm.michel@gmail.com>
Vincent Michel <vincent.michel@inria.fr> <vmic@crater2.logilab.fr>
Vincent Schut <schut@sarvision.nl> <vincent@TIMO.(none)>
Virgile Fritsch <virgile.fritsch@gmail.com>
Virgile Fritsch <virgile.fritsch@gmail.com> <virgile@virgile-Precision-M4400.(none)>
Vlad Niculae <vlad@vene.ro>
Wei Li <kuantkid@gmail.com>
Wei Li <kuantkid@gmail.com> <kuantkid+github@gmail.com>
X006 <x006@x006-icsl.(none)> <x006@x006laptop.(none)>
Xinfan Meng <mxf3306@gmail.com> <mxf@chomsky.localdomain>
Yannick Schwartz <yannick.schwartz@inria.fr> <yannick.schwartz@cea.fr>
Yannick Schwartz <yannick.schwartz@inria.fr> <ys218403@is220245.(none)>
Yannick Schwartz <yannick.schwartz@inria.fr> <yannick.schwartz@gmail.com>
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 21.6b0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
types: [file, python]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.782
hooks:
- id: mypy
files: sklearn/
additional_dependencies: [pytest==6.2.4]
================================================
FILE: .travis.yml
================================================
# Make it explicit that we favor the
# new container-based Travis workers
language: python
dist: xenial
cache:
apt: true
directories:
- $HOME/.cache/pip
- $HOME/.ccache
env:
global:
- CPU_COUNT=3
- TEST_DIR=/tmp/sklearn # Test directory for continuous integration jobs
- PYTEST_VERSION=latest
- OMP_NUM_THREADS=2
- OPENBLAS_NUM_THREADS=2
- SKLEARN_BUILD_PARALLEL=3
- SKLEARN_SKIP_NETWORK_TESTS=1
- PYTHONUNBUFFERED=1
# Custom environment variables for the ARM wheel builder
- CIBW_BUILD_VERBOSITY=1
- CIBW_TEST_COMMAND="bash {project}/build_tools/travis/test_wheels.sh"
- CIBW_ENVIRONMENT="CPU_COUNT=2
OMP_NUM_THREADS=2
OPENBLAS_NUM_THREADS=2
SKLEARN_BUILD_PARALLEL=10
SKLEARN_SKIP_NETWORK_TESTS=1
PYTHONUNBUFFERED=1"
jobs:
include:
# Linux environments to build the scikit-learn wheels for the ARM64
# architecture and Python 3.7 and newer. This is used both at release time
# with the manual trigger in the commit message in the release branch and as
# a scheduled task to build the weekly dev build on the main branch. The
# weekly frequency is meant to avoid depleting the Travis CI credits too
# fast.
- python: 3.7
os: linux
arch: arm64-graviton2
dist: focal
virt: lxd
group: edge
if: type = cron or commit_message =~ /\[cd build\]/
env:
- BUILD_WHEEL=true
- CIBW_BUILD=cp37-manylinux_aarch64
- python: 3.8
os: linux
arch: arm64-graviton2
dist: focal
virt: lxd
group: edge
if: type = cron or commit_message =~ /\[cd build\]/
env:
- BUILD_WHEEL=true
- CIBW_BUILD=cp38-manylinux_aarch64
- python: 3.9
os: linux
arch: arm64-graviton2
dist: focal
virt: lxd
group: edge
if: type = cron or commit_message =~ /\[cd build\]/
env:
- BUILD_WHEEL=true
- CIBW_BUILD=cp39-manylinux_aarch64
install: source build_tools/travis/install.sh || travis_terminate 1
script: source build_tools/travis/script.sh || travis_terminate 1
after_success: source build_tools/travis/after_success.sh || travis_terminate 1
notifications:
webhooks:
urls:
- https://webhooks.gitter.im/e/4ffabb4df010b70cd624
on_success: change
on_failure: always
on_start: never
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Code of Conduct
We are a community based on openness, as well as friendly and didactic discussions.
We aspire to treat everybody equally, and value their contributions.
Decisions are made based on technical merit and consensus.
Code is not the only way to help the project. Reviewing pull requests,
answering questions to help others on mailing lists or issues, organizing and
teaching tutorials, working on the website, improving the documentation, are
all priceless contributions.
We abide by the principles of openness, respect, and consideration of others of
the Python Software Foundation: https://www.python.org/psf/codeofconduct/
================================================
FILE: CONTRIBUTING.md
================================================
Contributing to scikit-learn
============================
The latest contributing guide is available in the repository at
`doc/developers/contributing.rst`, or online at:
https://scikit-learn.org/dev/developers/contributing.html
There are many ways to contribute to scikit-learn, with the most common ones
being contribution of code or documentation to the project. Improving the
documentation is no less important than improving the library itself. If you
find a typo in the documentation, or have made improvements, do not hesitate to
send an email to the mailing list or preferably submit a GitHub pull request.
Documentation can be found under the
[doc/](https://github.com/scikit-learn/scikit-learn/tree/main/doc) directory.
But there are many other ways to help. In particular answering queries on the
[issue tracker](https://github.com/scikit-learn/scikit-learn/issues),
investigating bugs, and [reviewing other developers' pull
requests](http://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines)
are very valuable contributions that decrease the burden on the project
maintainers.
Another way to contribute is to report issues you're facing, and give a "thumbs
up" on issues that others reported and that are relevant to you. It also helps
us if you spread the word: reference the project from your blog and articles,
link to it from your website, or simply star it in GitHub to say "I use it".
Quick links
-----------
* [Submitting a bug report or feature request](http://scikit-learn.org/dev/developers/contributing.html#submitting-a-bug-report-or-a-feature-request)
* [Contributing code](http://scikit-learn.org/dev/developers/contributing.html#contributing-code)
* [Coding guidelines](https://scikit-learn.org/dev/developers/develop.html#coding-guidelines)
* [Tips to read current code](https://scikit-learn.org/dev/developers/contributing.html#reading-the-existing-code-base)
Code of Conduct
---------------
We abide by the principles of openness, respect, and consideration of others
of the Python Software Foundation: https://www.python.org/psf/codeofconduct/.
================================================
FILE: COPYING
================================================
BSD 3-Clause License
Copyright (c) 2007-2021 The scikit-learn developers.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: MANIFEST.in
================================================
include *.rst
recursive-include doc *
recursive-include examples *
recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi *.tp
recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz
include COPYING
include README.rst
include pyproject.toml
include sklearn/externals/README
include sklearn/svm/src/liblinear/COPYRIGHT
include sklearn/svm/src/libsvm/LIBSVM_CHANGES
include conftest.py
include Makefile
include MANIFEST.in
include .coveragerc
# exclude from sdist
recursive-exclude asv_benchmarks *
recursive-exclude benchmarks *
recursive-exclude build_tools *
recursive-exclude maint_tools *
recursive-exclude benchmarks *
recursive-exclude .binder *
recursive-exclude .circleci *
exclude .codecov.yml
exclude .git-blame-ignore-revs
exclude .mailmap
exclude .pre-commit-config.yaml
exclude azure-pipelines.yml
exclude lgtm.yml
exclude CODE_OF_CONDUCT.md
exclude CONTRIBUTING.md
exclude PULL_REQUEST_TEMPLATE.md
================================================
FILE: Makefile
================================================
# simple makefile to simplify repetitive build env management tasks under posix
# caution: testing won't work on windows, see README
PYTHON ?= python
CYTHON ?= cython
PYTEST ?= pytest
CTAGS ?= ctags
# skip doctests on 32bit python
BITS := $(shell python -c 'import struct; print(8 * struct.calcsize("P"))')
all: clean inplace test
clean-ctags:
rm -f tags
clean: clean-ctags
$(PYTHON) setup.py clean
rm -rf dist
in: inplace # just a shortcut
inplace:
$(PYTHON) setup.py build_ext -i
test-code: in
$(PYTEST) --showlocals -v sklearn --durations=20
test-sphinxext:
$(PYTEST) --showlocals -v doc/sphinxext/
test-doc:
ifeq ($(BITS),64)
$(PYTEST) $(shell find doc -name '*.rst' | sort)
endif
test-code-parallel: in
$(PYTEST) -n auto --showlocals -v sklearn --durations=20
test-coverage:
rm -rf coverage .coverage
$(PYTEST) sklearn --showlocals -v --cov=sklearn --cov-report=html:coverage
test-coverage-parallel:
rm -rf coverage .coverage .coverage.*
$(PYTEST) sklearn -n auto --showlocals -v --cov=sklearn --cov-report=html:coverage
test: test-code test-sphinxext test-doc
trailing-spaces:
find sklearn -name "*.py" -exec perl -pi -e 's/[ \t]*$$//' {} \;
cython:
python setup.py build_src
ctags:
# make tags for symbol based navigation in emacs and vim
# Install with: sudo apt-get install exuberant-ctags
$(CTAGS) --python-kinds=-i -R sklearn
doc: inplace
$(MAKE) -C doc html
doc-noplot: inplace
$(MAKE) -C doc html-noplot
code-analysis:
flake8 sklearn | grep -v __init__ | grep -v external
pylint -E -i y sklearn/ -d E1103,E0611,E1101
flake8-diff:
git diff upstream/main -u -- "*.py" | flake8 --diff
================================================
FILE: README.rst
================================================
.. -*- mode: rst -*-
|Azure|_ |Travis|_ |Codecov|_ |CircleCI|_ |Nightly wheels|_ |Black|_ |PythonVersion|_ |PyPi|_ |DOI|_
.. |Azure| image:: https://dev.azure.com/scikit-learn/scikit-learn/_apis/build/status/scikit-learn.scikit-learn?branchName=main
.. _Azure: https://dev.azure.com/scikit-learn/scikit-learn/_build/latest?definitionId=1&branchName=main
.. |CircleCI| image:: https://circleci.com/gh/scikit-learn/scikit-learn/tree/main.svg?style=shield&circle-token=:circle-token
.. _CircleCI: https://circleci.com/gh/scikit-learn/scikit-learn
.. |Travis| image:: https://api.travis-ci.com/scikit-learn/scikit-learn.svg?branch=main
.. _Travis: https://app.travis-ci.com/github/scikit-learn/scikit-learn
.. |Codecov| image:: https://codecov.io/gh/scikit-learn/scikit-learn/branch/main/graph/badge.svg?token=Pk8G9gg3y9
.. _Codecov: https://codecov.io/gh/scikit-learn/scikit-learn
.. |Nightly wheels| image:: https://github.com/scikit-learn/scikit-learn/workflows/Wheel%20builder/badge.svg?event=schedule
.. _`Nightly wheels`: https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22+event%3Aschedule
.. |PythonVersion| image:: https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9-blue
.. _PythonVersion: https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9-blue
.. |PyPi| image:: https://img.shields.io/pypi/v/scikit-learn
.. _PyPi: https://pypi.org/project/scikit-learn
.. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg
.. _Black: https://github.com/psf/black
.. |DOI| image:: https://zenodo.org/badge/21369/scikit-learn/scikit-learn.svg
.. _DOI: https://zenodo.org/badge/latestdoi/21369/scikit-learn/scikit-learn
.. |PythonMinVersion| replace:: 3.7
.. |NumPyMinVersion| replace:: 1.14.6
.. |SciPyMinVersion| replace:: 1.1.0
.. |JoblibMinVersion| replace:: 0.11
.. |ThreadpoolctlMinVersion| replace:: 2.0.0
.. |MatplotlibMinVersion| replace:: 2.2.3
.. |Scikit-ImageMinVersion| replace:: 0.14.5
.. |PandasMinVersion| replace:: 0.25.0
.. |SeabornMinVersion| replace:: 0.9.0
.. |PytestMinVersion| replace:: 5.0.1
.. image:: https://raw.githubusercontent.com/scikit-learn/scikit-learn/main/doc/logos/scikit-learn-logo.png
:target: https://scikit-learn.org/
**scikit-learn** is a Python module for machine learning built on top of
SciPy and is distributed under the 3-Clause BSD license.
The project was started in 2007 by David Cournapeau as a Google Summer
of Code project, and since then many volunteers have contributed. See
the `About us <https://scikit-learn.org/dev/about.html#authors>`__ page
for a list of core contributors.
It is currently maintained by a team of volunteers.
Website: https://scikit-learn.org
Installation
------------
Dependencies
~~~~~~~~~~~~
scikit-learn requires:
- Python (>= |PythonMinVersion|)
- NumPy (>= |NumPyMinVersion|)
- SciPy (>= |SciPyMinVersion|)
- joblib (>= |JoblibMinVersion|)
- threadpoolctl (>= |ThreadpoolctlMinVersion|)
=======
**Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4.**
scikit-learn 0.23 and later require Python 3.6 or newer.
scikit-learn 1.0 and later require Python 3.7 or newer.
Scikit-learn plotting capabilities (i.e., functions start with ``plot_`` and
classes end with "Display") require Matplotlib (>= |MatplotlibMinVersion|).
For running the examples Matplotlib >= |MatplotlibMinVersion| is required.
A few examples require scikit-image >= |Scikit-ImageMinVersion|, a few examples
require pandas >= |PandasMinVersion|, some examples require seaborn >=
|SeabornMinVersion|.
User installation
~~~~~~~~~~~~~~~~~
If you already have a working installation of numpy and scipy,
the easiest way to install scikit-learn is using ``pip`` ::
pip install -U scikit-learn
or ``conda``::
conda install -c conda-forge scikit-learn
The documentation includes more detailed `installation instructions <https://scikit-learn.org/stable/install.html>`_.
Changelog
---------
See the `changelog <https://scikit-learn.org/dev/whats_new.html>`__
for a history of notable changes to scikit-learn.
Development
-----------
We welcome new contributors of all experience levels. The scikit-learn
community goals are to be helpful, welcoming, and effective. The
`Development Guide <https://scikit-learn.org/stable/developers/index.html>`_
has detailed information about contributing code, documentation, tests, and
more. We've included some basic information in this README.
Important links
~~~~~~~~~~~~~~~
- Official source code repo: https://github.com/scikit-learn/scikit-learn
- Download releases: https://pypi.org/project/scikit-learn/
- Issue tracker: https://github.com/scikit-learn/scikit-learn/issues
Source code
~~~~~~~~~~~
You can check the latest sources with the command::
git clone https://github.com/scikit-learn/scikit-learn.git
Contributing
~~~~~~~~~~~~
To learn more about making a contribution to scikit-learn, please see our
`Contributing guide
<https://scikit-learn.org/dev/developers/contributing.html>`_.
Testing
~~~~~~~
After installation, you can launch the test suite from outside the source
directory (you will need to have ``pytest`` >= |PyTestMinVersion| installed)::
pytest sklearn
See the web page https://scikit-learn.org/dev/developers/advanced_installation.html#testing
for more information.
Random number generation can be controlled during testing by setting
the ``SKLEARN_SEED`` environment variable.
Submitting a Pull Request
~~~~~~~~~~~~~~~~~~~~~~~~~
Before opening a Pull Request, have a look at the
full Contributing page to make sure your code complies
with our guidelines: https://scikit-learn.org/stable/developers/index.html
Project History
---------------
The project was started in 2007 by David Cournapeau as a Google Summer
of Code project, and since then many volunteers have contributed. See
the `About us <https://scikit-learn.org/dev/about.html#authors>`__ page
for a list of core contributors.
The project is currently maintained by a team of volunteers.
**Note**: `scikit-learn` was previously referred to as `scikits.learn`.
Help and Support
----------------
Documentation
~~~~~~~~~~~~~
- HTML documentation (stable release): https://scikit-learn.org
- HTML documentation (development version): https://scikit-learn.org/dev/
- FAQ: https://scikit-learn.org/stable/faq.html
Communication
~~~~~~~~~~~~~
- Mailing list: https://mail.python.org/mailman/listinfo/scikit-learn
- Gitter: https://gitter.im/scikit-learn/scikit-learn
- Twitter: https://twitter.com/scikit_learn
- Stack Overflow: https://stackoverflow.com/questions/tagged/scikit-learn
- Github Discussions: https://github.com/scikit-learn/scikit-learn/discussions
- Website: https://scikit-learn.org
- LinkedIn: https://www.linkedin.com/company/scikit-learn
Citation
~~~~~~~~
If you use scikit-learn in a scientific publication, we would appreciate citations: https://scikit-learn.org/stable/about.html#citing-scikit-learn
================================================
FILE: SECURITY.md
================================================
# Security Policy
## Supported Versions
| Version | Supported |
| --------- | ------------------ |
| 1.0.1 | :white_check_mark: |
| < 1.0.1 | :x: |
## Reporting a Vulnerability
Please report security vulnerabilities by email to `security@scikit-learn.org`.
This email is an alias to a subset of the scikit-learn maintainers' team.
If the security vulnerability is accepted, a patch will be crafted privately
in order to prepare a dedicated bugfix release as timely as possible (depending
on the complexity of the fix).
================================================
FILE: asv_benchmarks/.gitignore
================================================
*__pycache__*
env/
html/
results/
scikit-learn/
benchmarks/cache/
================================================
FILE: asv_benchmarks/asv.conf.json
================================================
{
// The version of the config file format. Do not change, unless
// you know what you are doing.
"version": 1,
// The name of the project being benchmarked
"project": "scikit-learn",
// The project's homepage
"project_url": "scikit-learn.org/",
// The URL or local path of the source code repository for the
// project being benchmarked
"repo": "..",
// The Python project's subdirectory in your repo. If missing or
// the empty string, the project is assumed to be located at the root
// of the repository.
// "repo_subdir": "",
// Customizable commands for building, installing, and
// uninstalling the project. See asv.conf.json documentation.
//
// "install_command": ["python -mpip install {wheel_file}"],
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
// "build_command": [
// "python setup.py build",
// "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
// ],
// List of branches to benchmark. If not provided, defaults to "master
// (for git) or "default" (for mercurial).
"branches": ["main"],
// "branches": ["default"], // for mercurial
// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
// "dvcs": "git",
// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",
// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
//"install_timeout": 600,
// the base URL to show a commit for the project.
"show_commit_url": "https://github.com/scikit-learn/scikit-learn/commit/",
// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
// "pythons": ["3.6"],
// The list of conda channel names to be searched for benchmark
// dependency packages in the specified order
// "conda_channels": ["conda-forge", "defaults"]
// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list or empty string indicates to just test against the default
// (latest) version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed via
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
//
"matrix": {
"numpy": [],
"scipy": [],
"cython": [],
"joblib": [],
"threadpoolctl": []
},
// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "six": null}, // don't run without six on conda
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "numpy": "1.8"},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
// ],
// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
// "benchmark_dir": "benchmarks",
// The directory (relative to the current directory) to cache the Python
// environments in. If not provided, defaults to "env"
// "env_dir": "env",
// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
// "results_dir": "results",
// The directory (relative to the current directory) that the html tree
// should be written to. If not provided, defaults to "html".
// "html_dir": "html",
// The number of characters to retain in the commit hashes.
// "hash_length": 8,
// `asv` will cache results of the recent builds in each
// environment, making them faster to install next time. This is
// the number of builds to keep, per environment.
// "build_cache_size": 2,
// The commits after which the regression search in `asv publish`
// should start looking for regressions. Dictionary whose keys are
// regexps matching to benchmark names, and values corresponding to
// the commit (exclusive) after which to start looking for
// regressions. The default is to start from the first commit
// with results. If the commit is `null`, regression detection is
// skipped for the matching benchmark.
//
// "regressions_first_commits": {
// "some_benchmark": "352cdf", // Consider regressions only after this commit
// "another_benchmark": null, // Skip regression detection altogether
// },
// The thresholds for relative change in results, after which `asv
// publish` starts reporting regressions. Dictionary of the same
// form as in ``regressions_first_commits``, with values
// indicating the thresholds. If multiple entries match, the
// maximum is taken. If no entry matches, the default is 5%.
//
// "regressions_thresholds": {
// "some_benchmark": 0.01, // Threshold of 1%
// "another_benchmark": 0.5, // Threshold of 50%
// },
}
================================================
FILE: asv_benchmarks/benchmarks/__init__.py
================================================
"""Benchmark suite for scikit-learn using ASV"""
================================================
FILE: asv_benchmarks/benchmarks/cluster.py
================================================
from sklearn.cluster import KMeans, MiniBatchKMeans
from .common import Benchmark, Estimator, Predictor, Transformer
from .datasets import _blobs_dataset, _20newsgroups_highdim_dataset
from .utils import neg_mean_inertia
class KMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
"""
Benchmarks for KMeans.
"""
param_names = ["representation", "algorithm", "init"]
params = (["dense", "sparse"], ["full", "elkan"], ["random", "k-means++"])
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
representation, algorithm, init = params
if representation == "sparse":
data = _20newsgroups_highdim_dataset(n_samples=8000)
else:
data = _blobs_dataset(n_clusters=20)
return data
def make_estimator(self, params):
representation, algorithm, init = params
max_iter = 30 if representation == "sparse" else 100
estimator = KMeans(
n_clusters=20,
algorithm=algorithm,
init=init,
n_init=1,
max_iter=max_iter,
tol=-1,
random_state=0,
)
return estimator
def make_scorers(self):
self.train_scorer = lambda _, __: neg_mean_inertia(
self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
)
self.test_scorer = lambda _, __: neg_mean_inertia(
self.X_val,
self.estimator.predict(self.X_val),
self.estimator.cluster_centers_,
)
class MiniBatchKMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
"""
Benchmarks for MiniBatchKMeans.
"""
param_names = ["representation", "init"]
params = (["dense", "sparse"], ["random", "k-means++"])
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
representation, init = params
if representation == "sparse":
data = _20newsgroups_highdim_dataset()
else:
data = _blobs_dataset(n_clusters=20)
return data
def make_estimator(self, params):
representation, init = params
max_iter = 5 if representation == "sparse" else 2
estimator = MiniBatchKMeans(
n_clusters=20,
init=init,
n_init=1,
max_iter=max_iter,
batch_size=1000,
max_no_improvement=None,
compute_labels=False,
random_state=0,
)
return estimator
def make_scorers(self):
self.train_scorer = lambda _, __: neg_mean_inertia(
self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
)
self.test_scorer = lambda _, __: neg_mean_inertia(
self.X_val,
self.estimator.predict(self.X_val),
self.estimator.cluster_centers_,
)
================================================
FILE: asv_benchmarks/benchmarks/common.py
================================================
import os
import json
import timeit
import pickle
import itertools
from abc import ABC, abstractmethod
from pathlib import Path
from multiprocessing import cpu_count
import numpy as np
def get_from_config():
"""Get benchmarks configuration from the config.json file"""
current_path = Path(__file__).resolve().parent
config_path = current_path / "config.json"
with open(config_path, "r") as config_file:
config_file = "".join(line for line in config_file if line and "//" not in line)
config = json.loads(config_file)
profile = os.getenv("SKLBENCH_PROFILE", config["profile"])
n_jobs_vals_env = os.getenv("SKLBENCH_NJOBS")
if n_jobs_vals_env:
n_jobs_vals = eval(n_jobs_vals_env)
else:
n_jobs_vals = config["n_jobs_vals"]
if not n_jobs_vals:
n_jobs_vals = list(range(1, 1 + cpu_count()))
cache_path = current_path / "cache"
cache_path.mkdir(exist_ok=True)
(cache_path / "estimators").mkdir(exist_ok=True)
(cache_path / "tmp").mkdir(exist_ok=True)
save_estimators = os.getenv("SKLBENCH_SAVE_ESTIMATORS", config["save_estimators"])
save_dir = os.getenv("ASV_COMMIT", "new")[:8]
if save_estimators:
(cache_path / "estimators" / save_dir).mkdir(exist_ok=True)
base_commit = os.getenv("SKLBENCH_BASE_COMMIT", config["base_commit"])
bench_predict = os.getenv("SKLBENCH_PREDICT", config["bench_predict"])
bench_transform = os.getenv("SKLBENCH_TRANSFORM", config["bench_transform"])
return (
profile,
n_jobs_vals,
save_estimators,
save_dir,
base_commit,
bench_predict,
bench_transform,
)
def get_estimator_path(benchmark, directory, params, save=False):
"""Get path of pickled fitted estimator"""
path = Path(__file__).resolve().parent / "cache"
path = (path / "estimators" / directory) if save else (path / "tmp")
filename = (
benchmark.__class__.__name__
+ "_estimator_"
+ "_".join(list(map(str, params)))
+ ".pkl"
)
return path / filename
def clear_tmp():
"""Clean the tmp directory"""
path = Path(__file__).resolve().parent / "cache" / "tmp"
for child in path.iterdir():
child.unlink()
class Benchmark(ABC):
"""Abstract base class for all the benchmarks"""
timer = timeit.default_timer # wall time
processes = 1
timeout = 500
(
profile,
n_jobs_vals,
save_estimators,
save_dir,
base_commit,
bench_predict,
bench_transform,
) = get_from_config()
if profile == "fast":
warmup_time = 0
repeat = 1
number = 1
min_run_count = 1
data_size = "small"
elif profile == "regular":
warmup_time = 1
repeat = (3, 100, 30)
data_size = "small"
elif profile == "large_scale":
warmup_time = 1
repeat = 3
number = 1
data_size = "large"
@property
@abstractmethod
def params(self):
pass
class Estimator(ABC):
"""Abstract base class for all benchmarks of estimators"""
@abstractmethod
def make_data(self, params):
"""Return the dataset for a combination of parameters"""
# The datasets are cached using joblib.Memory so it's fast and can be
# called for each repeat
pass
@abstractmethod
def make_estimator(self, params):
"""Return an instance of the estimator for a combination of parameters"""
pass
def skip(self, params):
"""Return True if the benchmark should be skipped for these params"""
return False
def setup_cache(self):
"""Pickle a fitted estimator for all combinations of parameters"""
# This is run once per benchmark class.
clear_tmp()
param_grid = list(itertools.product(*self.params))
for params in param_grid:
if self.skip(params):
continue
estimator = self.make_estimator(params)
X, _, y, _ = self.make_data(params)
estimator.fit(X, y)
est_path = get_estimator_path(
self, Benchmark.save_dir, params, Benchmark.save_estimators
)
with est_path.open(mode="wb") as f:
pickle.dump(estimator, f)
def setup(self, *params):
"""Generate dataset and load the fitted estimator"""
# This is run once per combination of parameters and per repeat so we
# need to avoid doing expensive operations there.
if self.skip(params):
raise NotImplementedError
self.X, self.X_val, self.y, self.y_val = self.make_data(params)
est_path = get_estimator_path(
self, Benchmark.save_dir, params, Benchmark.save_estimators
)
with est_path.open(mode="rb") as f:
self.estimator = pickle.load(f)
self.make_scorers()
def time_fit(self, *args):
self.estimator.fit(self.X, self.y)
def peakmem_fit(self, *args):
self.estimator.fit(self.X, self.y)
def track_train_score(self, *args):
if hasattr(self.estimator, "predict"):
y_pred = self.estimator.predict(self.X)
else:
y_pred = None
return float(self.train_scorer(self.y, y_pred))
def track_test_score(self, *args):
if hasattr(self.estimator, "predict"):
y_val_pred = self.estimator.predict(self.X_val)
else:
y_val_pred = None
return float(self.test_scorer(self.y_val, y_val_pred))
class Predictor(ABC):
"""Abstract base class for benchmarks of estimators implementing predict"""
if Benchmark.bench_predict:
def time_predict(self, *args):
self.estimator.predict(self.X)
def peakmem_predict(self, *args):
self.estimator.predict(self.X)
if Benchmark.base_commit is not None:
def track_same_prediction(self, *args):
est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
with est_path.open(mode="rb") as f:
estimator_base = pickle.load(f)
y_val_pred_base = estimator_base.predict(self.X_val)
y_val_pred = self.estimator.predict(self.X_val)
return np.allclose(y_val_pred_base, y_val_pred)
@property
@abstractmethod
def params(self):
pass
class Transformer(ABC):
"""Abstract base class for benchmarks of estimators implementing transform"""
if Benchmark.bench_transform:
def time_transform(self, *args):
self.estimator.transform(self.X)
def peakmem_transform(self, *args):
self.estimator.transform(self.X)
if Benchmark.base_commit is not None:
def track_same_transform(self, *args):
est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
with est_path.open(mode="rb") as f:
estimator_base = pickle.load(f)
X_val_t_base = estimator_base.transform(self.X_val)
X_val_t = self.estimator.transform(self.X_val)
return np.allclose(X_val_t_base, X_val_t)
@property
@abstractmethod
def params(self):
pass
================================================
FILE: asv_benchmarks/benchmarks/config.json
================================================
{
// "regular": Bencharks are run on small to medium datasets. Each benchmark
// is run multiple times and averaged.
// "fast": Benchmarks are run on small to medium datasets. Each benchmark
// is run only once. May provide unstable benchmarks.
// "large_scale": Benchmarks are run on large datasets. Each benchmark is
// run multiple times and averaged. This profile is meant to
// benchmark scalability and will take hours on single core.
// Can be overridden by environment variable SKLBENCH_PROFILE.
"profile": "regular",
// List of values of n_jobs to use for estimators which accept this
// parameter (-1 means all cores). An empty list means all values from 1 to
// the maximum number of available cores.
// Can be overridden by environment variable SKLBENCH_NJOBS.
"n_jobs_vals": [1],
// If true, fitted estimators are saved in ./cache/estimators/<commit hash>
// Can be overridden by environment variable SKLBENCH_SAVE_ESTIMATORS.
"save_estimators": false,
// Commit hash to compare estimator predictions with.
// If null, predictions are not compared.
// Can be overridden by environment variable SKLBENCH_BASE_COMMIT.
"base_commit": null,
// If false, the predict (resp. transform) method of the estimators won't
// be benchmarked.
// Can be overridden by environment variables SKLBENCH_PREDICT and
// SKLBENCH_TRANSFORM.
"bench_predict": true,
"bench_transform": true
}
================================================
FILE: asv_benchmarks/benchmarks/datasets.py
================================================
import numpy as np
import scipy.sparse as sp
from joblib import Memory
from pathlib import Path
from sklearn.decomposition import TruncatedSVD
from sklearn.datasets import (
make_blobs,
fetch_20newsgroups,
fetch_openml,
load_digits,
make_regression,
make_classification,
fetch_olivetti_faces,
)
from sklearn.preprocessing import MaxAbsScaler, StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
# memory location for caching datasets
M = Memory(location=str(Path(__file__).resolve().parent / "cache"))
@M.cache
def _blobs_dataset(n_samples=500000, n_features=3, n_clusters=100, dtype=np.float32):
X, _ = make_blobs(
n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=0
)
X = X.astype(dtype, copy=False)
X, X_val = train_test_split(X, test_size=0.1, random_state=0)
return X, X_val, None, None
@M.cache
def _20newsgroups_highdim_dataset(n_samples=None, ngrams=(1, 1), dtype=np.float32):
newsgroups = fetch_20newsgroups(random_state=0)
vectorizer = TfidfVectorizer(ngram_range=ngrams, dtype=dtype)
X = vectorizer.fit_transform(newsgroups.data[:n_samples])
y = newsgroups.target[:n_samples]
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
return X, X_val, y, y_val
@M.cache
def _20newsgroups_lowdim_dataset(n_components=100, ngrams=(1, 1), dtype=np.float32):
newsgroups = fetch_20newsgroups()
vectorizer = TfidfVectorizer(ngram_range=ngrams)
X = vectorizer.fit_transform(newsgroups.data)
X = X.astype(dtype, copy=False)
svd = TruncatedSVD(n_components=n_components)
X = svd.fit_transform(X)
y = newsgroups.target
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
return X, X_val, y, y_val
@M.cache
def _mnist_dataset(dtype=np.float32):
X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
X = X.astype(dtype, copy=False)
X = MaxAbsScaler().fit_transform(X)
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
return X, X_val, y, y_val
@M.cache
def _digits_dataset(n_samples=None, dtype=np.float32):
X, y = load_digits(return_X_y=True)
X = X.astype(dtype, copy=False)
X = MaxAbsScaler().fit_transform(X)
X = X[:n_samples]
y = y[:n_samples]
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
return X, X_val, y, y_val
@M.cache
def _synth_regression_dataset(n_samples=100000, n_features=100, dtype=np.float32):
X, y = make_regression(
n_samples=n_samples,
n_features=n_features,
n_informative=n_features // 10,
noise=50,
random_state=0,
)
X = X.astype(dtype, copy=False)
X = StandardScaler().fit_transform(X)
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
return X, X_val, y, y_val
@M.cache
def _synth_regression_sparse_dataset(
n_samples=10000, n_features=10000, density=0.01, dtype=np.float32
):
X = sp.random(
m=n_samples, n=n_features, density=density, format="csr", random_state=0
)
X.data = np.random.RandomState(0).randn(X.getnnz())
X = X.astype(dtype, copy=False)
coefs = sp.random(m=n_features, n=1, density=0.5, random_state=0)
coefs.data = np.random.RandomState(0).randn(coefs.getnnz())
y = X.dot(coefs.toarray()).reshape(-1)
y += 0.2 * y.std() * np.random.randn(n_samples)
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
return X, X_val, y, y_val
@M.cache
def _synth_classification_dataset(
n_samples=1000, n_features=10000, n_classes=2, dtype=np.float32
):
X, y = make_classification(
n_samples=n_samples,
n_features=n_features,
n_classes=n_classes,
random_state=0,
n_informative=n_features,
n_redundant=0,
)
X = X.astype(dtype, copy=False)
X = StandardScaler().fit_transform(X)
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
return X, X_val, y, y_val
@M.cache
def _olivetti_faces_dataset():
dataset = fetch_olivetti_faces(shuffle=True, random_state=42)
faces = dataset.data
n_samples, n_features = faces.shape
faces_centered = faces - faces.mean(axis=0)
# local centering
faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)
X = faces_centered
X, X_val = train_test_split(X, test_size=0.1, random_state=0)
return X, X_val, None, None
@M.cache
def _random_dataset(
n_samples=1000, n_features=1000, representation="dense", dtype=np.float32
):
if representation == "dense":
X = np.random.RandomState(0).random_sample((n_samples, n_features))
X = X.astype(dtype, copy=False)
else:
X = sp.random(
n_samples,
n_features,
density=0.05,
format="csr",
dtype=dtype,
random_state=0,
)
X, X_val = train_test_split(X, test_size=0.1, random_state=0)
return X, X_val, None, None
================================================
FILE: asv_benchmarks/benchmarks/decomposition.py
================================================
from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning
from .common import Benchmark, Estimator, Transformer
from .datasets import _olivetti_faces_dataset, _mnist_dataset
from .utils import make_pca_scorers, make_dict_learning_scorers
class PCABenchmark(Transformer, Estimator, Benchmark):
"""
Benchmarks for PCA.
"""
param_names = ["svd_solver"]
params = (["full", "arpack", "randomized"],)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
return _mnist_dataset()
def make_estimator(self, params):
(svd_solver,) = params
estimator = PCA(n_components=32, svd_solver=svd_solver, random_state=0)
return estimator
def make_scorers(self):
make_pca_scorers(self)
class DictionaryLearningBenchmark(Transformer, Estimator, Benchmark):
"""
Benchmarks for DictionaryLearning.
"""
param_names = ["fit_algorithm", "n_jobs"]
params = (["lars", "cd"], Benchmark.n_jobs_vals)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
return _olivetti_faces_dataset()
def make_estimator(self, params):
fit_algorithm, n_jobs = params
estimator = DictionaryLearning(
n_components=15,
fit_algorithm=fit_algorithm,
alpha=0.1,
max_iter=20,
tol=1e-16,
random_state=0,
n_jobs=n_jobs,
)
return estimator
def make_scorers(self):
make_dict_learning_scorers(self)
class MiniBatchDictionaryLearningBenchmark(Transformer, Estimator, Benchmark):
"""
Benchmarks for MiniBatchDictionaryLearning
"""
param_names = ["fit_algorithm", "n_jobs"]
params = (["lars", "cd"], Benchmark.n_jobs_vals)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
return _olivetti_faces_dataset()
def make_estimator(self, params):
fit_algorithm, n_jobs = params
estimator = MiniBatchDictionaryLearning(
n_components=15,
fit_algorithm=fit_algorithm,
alpha=0.1,
batch_size=3,
random_state=0,
n_jobs=n_jobs,
)
return estimator
def make_scorers(self):
make_dict_learning_scorers(self)
================================================
FILE: asv_benchmarks/benchmarks/ensemble.py
================================================
from sklearn.ensemble import (
RandomForestClassifier,
GradientBoostingClassifier,
HistGradientBoostingClassifier,
)
from .common import Benchmark, Estimator, Predictor
from .datasets import (
_20newsgroups_highdim_dataset,
_20newsgroups_lowdim_dataset,
_synth_classification_dataset,
)
from .utils import make_gen_classif_scorers
class RandomForestClassifierBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for RandomForestClassifier.
"""
param_names = ["representation", "n_jobs"]
params = (["dense", "sparse"], Benchmark.n_jobs_vals)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
representation, n_jobs = params
if representation == "sparse":
data = _20newsgroups_highdim_dataset()
else:
data = _20newsgroups_lowdim_dataset()
return data
def make_estimator(self, params):
representation, n_jobs = params
n_estimators = 500 if Benchmark.data_size == "large" else 100
estimator = RandomForestClassifier(
n_estimators=n_estimators,
min_samples_split=10,
max_features="log2",
n_jobs=n_jobs,
random_state=0,
)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
class GradientBoostingClassifierBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for GradientBoostingClassifier.
"""
param_names = ["representation"]
params = (["dense", "sparse"],)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
(representation,) = params
if representation == "sparse":
data = _20newsgroups_highdim_dataset()
else:
data = _20newsgroups_lowdim_dataset()
return data
def make_estimator(self, params):
(representation,) = params
n_estimators = 100 if Benchmark.data_size == "large" else 10
estimator = GradientBoostingClassifier(
n_estimators=n_estimators,
max_features="log2",
subsample=0.5,
random_state=0,
)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
class HistGradientBoostingClassifierBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for HistGradientBoostingClassifier.
"""
param_names = []
params = ()
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
data = _synth_classification_dataset(
n_samples=10000, n_features=100, n_classes=5
)
return data
def make_estimator(self, params):
estimator = HistGradientBoostingClassifier(
max_iter=100, max_leaf_nodes=15, early_stopping=False, random_state=0
)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
================================================
FILE: asv_benchmarks/benchmarks/linear_model.py
================================================
from sklearn.linear_model import (
LogisticRegression,
Ridge,
ElasticNet,
Lasso,
LinearRegression,
SGDRegressor,
)
from .common import Benchmark, Estimator, Predictor
from .datasets import (
_20newsgroups_highdim_dataset,
_20newsgroups_lowdim_dataset,
_synth_regression_dataset,
_synth_regression_sparse_dataset,
)
from .utils import make_gen_classif_scorers, make_gen_reg_scorers
class LogisticRegressionBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for LogisticRegression.
"""
param_names = ["representation", "solver", "n_jobs"]
params = (["dense", "sparse"], ["lbfgs", "saga"], Benchmark.n_jobs_vals)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
representation, solver, n_jobs = params
if Benchmark.data_size == "large":
if representation == "sparse":
data = _20newsgroups_highdim_dataset(n_samples=10000)
else:
data = _20newsgroups_lowdim_dataset(n_components=1e3)
else:
if representation == "sparse":
data = _20newsgroups_highdim_dataset(n_samples=2500)
else:
data = _20newsgroups_lowdim_dataset()
return data
def make_estimator(self, params):
representation, solver, n_jobs = params
penalty = "l2" if solver == "lbfgs" else "l1"
estimator = LogisticRegression(
solver=solver,
penalty=penalty,
multi_class="multinomial",
tol=0.01,
n_jobs=n_jobs,
random_state=0,
)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
class RidgeBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for Ridge.
"""
param_names = ["representation", "solver"]
params = (
["dense", "sparse"],
["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"],
)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
representation, solver = params
if representation == "dense":
data = _synth_regression_dataset(n_samples=500000, n_features=100)
else:
data = _synth_regression_sparse_dataset(
n_samples=100000, n_features=10000, density=0.005
)
return data
def make_estimator(self, params):
representation, solver = params
estimator = Ridge(solver=solver, fit_intercept=False, random_state=0)
return estimator
def make_scorers(self):
make_gen_reg_scorers(self)
def skip(self, params):
representation, solver = params
if representation == "sparse" and solver == "svd":
return True
return False
class LinearRegressionBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for Linear Reagression.
"""
param_names = ["representation"]
params = (["dense", "sparse"],)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
(representation,) = params
if representation == "dense":
data = _synth_regression_dataset(n_samples=1000000, n_features=100)
else:
data = _synth_regression_sparse_dataset(
n_samples=10000, n_features=100000, density=0.01
)
return data
def make_estimator(self, params):
estimator = LinearRegression()
return estimator
def make_scorers(self):
make_gen_reg_scorers(self)
class SGDRegressorBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmark for SGD
"""
param_names = ["representation"]
params = (["dense", "sparse"],)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
(representation,) = params
if representation == "dense":
data = _synth_regression_dataset(n_samples=100000, n_features=200)
else:
data = _synth_regression_sparse_dataset(
n_samples=100000, n_features=1000, density=0.01
)
return data
def make_estimator(self, params):
estimator = SGDRegressor(max_iter=1000, tol=1e-16, random_state=0)
return estimator
def make_scorers(self):
make_gen_reg_scorers(self)
class ElasticNetBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for ElasticNet.
"""
param_names = ["representation", "precompute"]
params = (["dense", "sparse"], [True, False])
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
representation, precompute = params
if representation == "dense":
data = _synth_regression_dataset(n_samples=1000000, n_features=100)
else:
data = _synth_regression_sparse_dataset(
n_samples=50000, n_features=5000, density=0.01
)
return data
def make_estimator(self, params):
representation, precompute = params
estimator = ElasticNet(precompute=precompute, alpha=0.001, random_state=0)
return estimator
def make_scorers(self):
make_gen_reg_scorers(self)
def skip(self, params):
representation, precompute = params
if representation == "sparse" and precompute is False:
return True
return False
class LassoBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for Lasso.
"""
param_names = ["representation", "precompute"]
params = (["dense", "sparse"], [True, False])
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
representation, precompute = params
if representation == "dense":
data = _synth_regression_dataset(n_samples=1000000, n_features=100)
else:
data = _synth_regression_sparse_dataset(
n_samples=50000, n_features=5000, density=0.01
)
return data
def make_estimator(self, params):
representation, precompute = params
estimator = Lasso(precompute=precompute, alpha=0.001, random_state=0)
return estimator
def make_scorers(self):
make_gen_reg_scorers(self)
def skip(self, params):
representation, precompute = params
if representation == "sparse" and precompute is False:
return True
return False
================================================
FILE: asv_benchmarks/benchmarks/manifold.py
================================================
from sklearn.manifold import TSNE
from .common import Benchmark, Estimator
from .datasets import _digits_dataset
class TSNEBenchmark(Estimator, Benchmark):
"""
Benchmarks for t-SNE.
"""
param_names = ["method"]
params = (["exact", "barnes_hut"],)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
(method,) = params
n_samples = 500 if method == "exact" else None
return _digits_dataset(n_samples=n_samples)
def make_estimator(self, params):
(method,) = params
estimator = TSNE(random_state=0, method=method)
return estimator
def make_scorers(self):
self.train_scorer = lambda _, __: self.estimator.kl_divergence_
self.test_scorer = lambda _, __: self.estimator.kl_divergence_
================================================
FILE: asv_benchmarks/benchmarks/metrics.py
================================================
from sklearn.metrics.pairwise import pairwise_distances
from .common import Benchmark
from .datasets import _random_dataset
class PairwiseDistancesBenchmark(Benchmark):
"""
Benchmarks for pairwise distances.
"""
param_names = ["representation", "metric", "n_jobs"]
params = (
["dense", "sparse"],
["cosine", "euclidean", "manhattan", "correlation"],
Benchmark.n_jobs_vals,
)
def setup(self, *params):
representation, metric, n_jobs = params
if representation == "sparse" and metric == "correlation":
raise NotImplementedError
if Benchmark.data_size == "large":
if metric in ("manhattan", "correlation"):
n_samples = 8000
else:
n_samples = 24000
else:
if metric in ("manhattan", "correlation"):
n_samples = 4000
else:
n_samples = 12000
data = _random_dataset(n_samples=n_samples, representation=representation)
self.X, self.X_val, self.y, self.y_val = data
self.pdist_params = {"metric": metric, "n_jobs": n_jobs}
def time_pairwise_distances(self, *args):
pairwise_distances(self.X, **self.pdist_params)
def peakmem_pairwise_distances(self, *args):
pairwise_distances(self.X, **self.pdist_params)
================================================
FILE: asv_benchmarks/benchmarks/model_selection.py
================================================
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score
from .common import Benchmark, Estimator, Predictor
from .datasets import _synth_classification_dataset
from .utils import make_gen_classif_scorers
class CrossValidationBenchmark(Benchmark):
"""
Benchmarks for Cross Validation.
"""
timeout = 20000
param_names = ["n_jobs"]
params = (Benchmark.n_jobs_vals,)
def setup(self, *params):
(n_jobs,) = params
data = _synth_classification_dataset(n_samples=50000, n_features=100)
self.X, self.X_val, self.y, self.y_val = data
self.clf = RandomForestClassifier(n_estimators=50, max_depth=10, random_state=0)
cv = 16 if Benchmark.data_size == "large" else 4
self.cv_params = {"n_jobs": n_jobs, "cv": cv}
def time_crossval(self, *args):
cross_val_score(self.clf, self.X, self.y, **self.cv_params)
def peakmem_crossval(self, *args):
cross_val_score(self.clf, self.X, self.y, **self.cv_params)
def track_crossval(self, *args):
return float(cross_val_score(self.clf, self.X, self.y, **self.cv_params).mean())
class GridSearchBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for GridSearch.
"""
timeout = 20000
param_names = ["n_jobs"]
params = (Benchmark.n_jobs_vals,)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
data = _synth_classification_dataset(n_samples=10000, n_features=100)
return data
def make_estimator(self, params):
(n_jobs,) = params
clf = RandomForestClassifier(random_state=0)
if Benchmark.data_size == "large":
n_estimators_list = [10, 25, 50, 100, 500]
max_depth_list = [5, 10, None]
max_features_list = [0.1, 0.4, 0.8, 1.0]
else:
n_estimators_list = [10, 25, 50]
max_depth_list = [5, 10]
max_features_list = [0.1, 0.4, 0.8]
param_grid = {
"n_estimators": n_estimators_list,
"max_depth": max_depth_list,
"max_features": max_features_list,
}
estimator = GridSearchCV(clf, param_grid, n_jobs=n_jobs, cv=4)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
================================================
FILE: asv_benchmarks/benchmarks/neighbors.py
================================================
from sklearn.neighbors import KNeighborsClassifier
from .common import Benchmark, Estimator, Predictor
from .datasets import _20newsgroups_lowdim_dataset
from .utils import make_gen_classif_scorers
class KNeighborsClassifierBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for KNeighborsClassifier.
"""
param_names = ["algorithm", "dimension", "n_jobs"]
params = (["brute", "kd_tree", "ball_tree"], ["low", "high"], Benchmark.n_jobs_vals)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
algorithm, dimension, n_jobs = params
if Benchmark.data_size == "large":
n_components = 40 if dimension == "low" else 200
else:
n_components = 10 if dimension == "low" else 50
data = _20newsgroups_lowdim_dataset(n_components=n_components)
return data
def make_estimator(self, params):
algorithm, dimension, n_jobs = params
estimator = KNeighborsClassifier(algorithm=algorithm, n_jobs=n_jobs)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
================================================
FILE: asv_benchmarks/benchmarks/svm.py
================================================
from sklearn.svm import SVC
from .common import Benchmark, Estimator, Predictor
from .datasets import _synth_classification_dataset
from .utils import make_gen_classif_scorers
class SVCBenchmark(Predictor, Estimator, Benchmark):
"""Benchmarks for SVC."""
param_names = ["kernel"]
params = (["linear", "poly", "rbf", "sigmoid"],)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
return _synth_classification_dataset()
def make_estimator(self, params):
(kernel,) = params
estimator = SVC(
max_iter=100, tol=1e-16, kernel=kernel, random_state=0, gamma="scale"
)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
================================================
FILE: asv_benchmarks/benchmarks/utils.py
================================================
import numpy as np
from sklearn.metrics import balanced_accuracy_score, r2_score
def neg_mean_inertia(X, labels, centers):
return -(np.asarray(X - centers[labels]) ** 2).sum(axis=1).mean()
def make_gen_classif_scorers(caller):
caller.train_scorer = balanced_accuracy_score
caller.test_scorer = balanced_accuracy_score
def make_gen_reg_scorers(caller):
caller.test_scorer = r2_score
caller.train_scorer = r2_score
def neg_mean_data_error(X, U, V):
return -np.sqrt(((X - U.dot(V)) ** 2).mean())
def make_dict_learning_scorers(caller):
caller.train_scorer = lambda _, __: (
neg_mean_data_error(
caller.X, caller.estimator.transform(caller.X), caller.estimator.components_
)
)
caller.test_scorer = lambda _, __: (
neg_mean_data_error(
caller.X_val,
caller.estimator.transform(caller.X_val),
caller.estimator.components_,
)
)
def explained_variance_ratio(Xt, X):
return np.var(Xt, axis=0).sum() / np.var(X, axis=0).sum()
def make_pca_scorers(caller):
caller.train_scorer = lambda _, __: caller.estimator.explained_variance_ratio_.sum()
caller.test_scorer = lambda _, __: (
explained_variance_ratio(caller.estimator.transform(caller.X_val), caller.X_val)
)
================================================
FILE: azure-pipelines.yml
================================================
# Adapted from https://github.com/pandas-dev/pandas/blob/master/azure-pipelines.yml
schedules:
- cron: "30 2 * * *"
displayName: Run nightly build
branches:
include:
- main
always: true
jobs:
- job: git_commit
displayName: Get Git Commit
pool:
vmImage: ubuntu-20.04
steps:
- bash: |
set -ex
if [[ $BUILD_REASON == "PullRequest" ]]; then
# By default pull requests use refs/pull/PULL_ID/merge as the source branch
# which has a "Merge ID into ID" as a commit message. The latest commit
# message is the second to last commit
COMMIT_ID=$(echo $BUILD_SOURCEVERSIONMESSAGE | awk '{print $2}')
message=$(git log $COMMIT_ID -1 --pretty=%B)
else
message=$BUILD_SOURCEVERSIONMESSAGE
fi
echo "##vso[task.setvariable variable=message;isOutput=true]$message"
name: commit
displayName: Get source version message
- job: linting
dependsOn: [git_commit]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[lint skip]')),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
)
displayName: Linting
pool:
vmImage: ubuntu-20.04
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.9'
- bash: |
# Include pytest compatibility with mypy
pip install pytest flake8 mypy==0.782 black==21.6b0
displayName: Install linters
- bash: |
black --check --diff .
displayName: Run black
- bash: |
./build_tools/circle/linting.sh
displayName: Run linting
- bash: |
mypy sklearn/
displayName: Run mypy
- template: build_tools/azure/posix.yml
parameters:
name: Linux_Nightly
vmImage: ubuntu-20.04
dependsOn: [git_commit, linting]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
or(eq(variables['Build.Reason'], 'Schedule'),
contains(dependencies['git_commit']['outputs']['commit.message'], '[scipy-dev]'
)
)
)
matrix:
pylatest_pip_scipy_dev:
DISTRIB: 'conda-pip-scipy-dev'
PYTHON_VERSION: '*'
CHECK_WARNINGS: 'true'
CHECK_PYTEST_SOFT_DEPENDENCY: 'true'
TEST_DOCSTRINGS: 'true'
# Tests that require large downloads over the networks are skipped in CI.
# Here we make sure, that they are still run on a regular basis.
SKLEARN_SKIP_NETWORK_TESTS: '0'
CREATE_ISSUE_ON_TRACKER: 'true'
# Check compilation with intel C++ compiler (ICC)
- template: build_tools/azure/posix.yml
parameters:
name: Linux_Nightly_ICC
vmImage: ubuntu-20.04
dependsOn: [git_commit, linting]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
or(eq(variables['Build.Reason'], 'Schedule'),
contains(dependencies['git_commit']['outputs']['commit.message'], '[icc-build]')
)
)
matrix:
pylatest_conda_forge_mkl:
DISTRIB: 'conda'
CONDA_CHANNEL: 'conda-forge'
PYTHON_VERSION: '*'
BLAS: 'mkl'
COVERAGE: 'false'
BUILD_WITH_ICC: 'true'
- template: build_tools/azure/posix-docker.yml
parameters:
name: Linux_Nightly_PyPy
vmImage: ubuntu-20.04
dependsOn: [linting, git_commit]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
or(
eq(variables['Build.Reason'], 'Schedule'),
contains(dependencies['git_commit']['outputs']['commit.message'], '[pypy]')
)
)
matrix:
pypy3:
DISTRIB: 'conda-mamba-pypy3'
DOCKER_CONTAINER: 'condaforge/mambaforge-pypy3:4.10.3-5'
PILLOW_VERSION: 'none'
PANDAS_VERSION: 'none'
CREATE_ISSUE_ON_TRACKER: 'true'
# Will run all the time regardless of linting outcome.
- template: build_tools/azure/posix.yml
parameters:
name: Linux_Runs
vmImage: ubuntu-20.04
dependsOn: [git_commit]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
)
matrix:
pylatest_conda_forge_mkl:
DISTRIB: 'conda'
CONDA_CHANNEL: 'conda-forge'
PYTHON_VERSION: '*'
BLAS: 'mkl'
COVERAGE: 'true'
SHOW_SHORT_SUMMARY: 'true'
# Check compilation with Ubuntu bionic 18.04 LTS and scipy from conda-forge
- template: build_tools/azure/posix.yml
parameters:
name: Ubuntu_Bionic
vmImage: ubuntu-18.04
dependsOn: [git_commit, linting]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
ne(variables['Build.Reason'], 'Schedule')
)
matrix:
py37_conda_forge_openblas_ubuntu_1804:
DISTRIB: 'conda'
CONDA_CHANNEL: 'conda-forge'
PYTHON_VERSION: '3.7'
BLAS: 'openblas'
COVERAGE: 'false'
BUILD_WITH_ICC: 'false'
- template: build_tools/azure/posix.yml
parameters:
name: Linux
vmImage: ubuntu-20.04
dependsOn: [linting, git_commit]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
ne(variables['Build.Reason'], 'Schedule')
)
matrix:
# Linux environment to test that scikit-learn can be built against
# versions of numpy, scipy with ATLAS that comes with Ubuntu Focal 20.04
# i.e. numpy 1.17.4 and scipy 1.3.3
ubuntu_atlas:
DISTRIB: 'ubuntu'
JOBLIB_VERSION: 'min'
PANDAS_VERSION: 'none'
THREADPOOLCTL_VERSION: 'min'
COVERAGE: 'false'
# Linux + Python 3.7 build with OpenBLAS and without SITE_JOBLIB
py37_conda_defaults_openblas:
DISTRIB: 'conda'
CONDA_CHANNEL: 'defaults' # Anaconda main channel
PYTHON_VERSION: '3.7'
BLAS: 'openblas'
NUMPY_VERSION: 'min'
SCIPY_VERSION: 'min'
MATPLOTLIB_VERSION: 'min'
THREADPOOLCTL_VERSION: '2.2.0'
# Linux environment to test the latest available dependencies and MKL.
# It runs tests requiring lightgbm, pandas and PyAMG.
pylatest_pip_openblas_pandas:
DISTRIB: 'conda-pip-latest'
PYTHON_VERSION: '3.9'
PANDAS_VERSION: 'none'
CHECK_PYTEST_SOFT_DEPENDENCY: 'true'
TEST_DOCSTRINGS: 'true'
CHECK_WARNINGS: 'true'
- template: build_tools/azure/posix-docker.yml
parameters:
name: Linux_Docker
vmImage: ubuntu-20.04
dependsOn: [linting, git_commit]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
ne(variables['Build.Reason'], 'Schedule')
)
matrix:
debian_atlas_32bit:
DISTRIB: 'debian-32'
DOCKER_CONTAINER: 'i386/debian:10.9'
JOBLIB_VERSION: 'min'
# disable pytest xdist due to unknown bug with 32-bit container
PYTEST_XDIST_VERSION: 'none'
PYTEST_VERSION: 'min'
THREADPOOLCTL_VERSION: '2.2.0'
- template: build_tools/azure/posix.yml
parameters:
name: macOS
vmImage: macOS-10.14
dependsOn: [linting, git_commit]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
ne(variables['Build.Reason'], 'Schedule')
)
matrix:
pylatest_conda_forge_mkl:
DISTRIB: 'conda'
BLAS: 'mkl'
CONDA_CHANNEL: 'conda-forge'
pylatest_conda_mkl_no_openmp:
DISTRIB: 'conda'
BLAS: 'mkl'
SKLEARN_TEST_NO_OPENMP: 'true'
SKLEARN_SKIP_OPENMP_TEST: 'true'
- template: build_tools/azure/windows.yml
parameters:
name: Windows
vmImage: windows-latest
dependsOn: [linting, git_commit]
condition: |
and(
succeeded(),
not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
ne(variables['Build.Reason'], 'Schedule')
)
matrix:
py37_conda_forge_mkl:
DISTRIB: 'conda'
CONDA_CHANNEL: 'conda-forge'
PYTHON_VERSION: '3.7'
CHECK_WARNINGS: 'true'
PYTHON_ARCH: '64'
PYTEST_VERSION: '*'
COVERAGE: 'true'
py37_pip_openblas_32bit:
PYTHON_VERSION: '3.7'
PYTHON_ARCH: '32'
================================================
FILE: benchmarks/.gitignore
================================================
/bhtsne
*.npy
*.json
/mnist_tsne_output/
================================================
FILE: benchmarks/bench_20newsgroups.py
================================================
from time import time
import argparse
import numpy as np
from sklearn.dummy import DummyClassifier
from sklearn.datasets import fetch_20newsgroups_vectorized
from sklearn.metrics import accuracy_score
from sklearn.utils.validation import check_array
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
ESTIMATORS = {
"dummy": DummyClassifier(),
"random_forest": RandomForestClassifier(max_features="sqrt", min_samples_split=10),
"extra_trees": ExtraTreesClassifier(max_features="sqrt", min_samples_split=10),
"logistic_regression": LogisticRegression(),
"naive_bayes": MultinomialNB(),
"adaboost": AdaBoostClassifier(n_estimators=10),
}
###############################################################################
# Data
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-e", "--estimators", nargs="+", required=True, choices=ESTIMATORS
)
args = vars(parser.parse_args())
data_train = fetch_20newsgroups_vectorized(subset="train")
data_test = fetch_20newsgroups_vectorized(subset="test")
X_train = check_array(data_train.data, dtype=np.float32, accept_sparse="csc")
X_test = check_array(data_test.data, dtype=np.float32, accept_sparse="csr")
y_train = data_train.target
y_test = data_test.target
print("20 newsgroups")
print("=============")
print(f"X_train.shape = {X_train.shape}")
print(f"X_train.format = {X_train.format}")
print(f"X_train.dtype = {X_train.dtype}")
print(f"X_train density = {X_train.nnz / np.product(X_train.shape)}")
print(f"y_train {y_train.shape}")
print(f"X_test {X_test.shape}")
print(f"X_test.format = {X_test.format}")
print(f"X_test.dtype = {X_test.dtype}")
print(f"y_test {y_test.shape}")
print()
print("Classifier Training")
print("===================")
accuracy, train_time, test_time = {}, {}, {}
for name in sorted(args["estimators"]):
clf = ESTIMATORS[name]
try:
clf.set_params(random_state=0)
except (TypeError, ValueError):
pass
print("Training %s ... " % name, end="")
t0 = time()
clf.fit(X_train, y_train)
train_time[name] = time() - t0
t0 = time()
y_pred = clf.predict(X_test)
test_time[name] = time() - t0
accuracy[name] = accuracy_score(y_test, y_pred)
print("done")
print()
print("Classification performance:")
print("===========================")
print()
print("%s %s %s %s" % ("Classifier ", "train-time", "test-time", "Accuracy"))
print("-" * 44)
for name in sorted(accuracy, key=accuracy.get):
print(
"%s %s %s %s"
% (
name.ljust(16),
("%.4fs" % train_time[name]).center(10),
("%.4fs" % test_time[name]).center(10),
("%.4f" % accuracy[name]).center(10),
)
)
print()
================================================
FILE: benchmarks/bench_covertype.py
================================================
"""
===========================
Covertype dataset benchmark
===========================
Benchmark stochastic gradient descent (SGD), Liblinear, and Naive Bayes, CART
(decision tree), RandomForest and Extra-Trees on the forest covertype dataset
of Blackard, Jock, and Dean [1]. The dataset comprises 581,012 samples. It is
low dimensional with 54 features and a sparsity of approx. 23%. Here, we
consider the task of predicting class 1 (spruce/fir). The classification
performance of SGD is competitive with Liblinear while being two orders of
magnitude faster to train::
[..]
Classification performance:
===========================
Classifier train-time test-time error-rate
--------------------------------------------
liblinear 15.9744s 0.0705s 0.2305
GaussianNB 3.0666s 0.3884s 0.4841
SGD 1.0558s 0.1152s 0.2300
CART 79.4296s 0.0523s 0.0469
RandomForest 1190.1620s 0.5881s 0.0243
ExtraTrees 640.3194s 0.6495s 0.0198
The same task has been used in a number of papers including:
* `"SVM Optimization: Inverse Dependence on Training Set Size"
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.139.2112>`_
S. Shalev-Shwartz, N. Srebro - In Proceedings of ICML '08.
* `"Pegasos: Primal estimated sub-gradient solver for svm"
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.74.8513>`_
S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07.
* `"Training Linear SVMs in Linear Time"
<https://www.cs.cornell.edu/people/tj/publications/joachims_06a.pdf>`_
T. Joachims - In SIGKDD '06
[1] https://archive.ics.uci.edu/ml/datasets/Covertype
"""
# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Arnaud Joly <arnaud.v.joly@gmail.com>
# License: BSD 3 clause
import os
from time import time
import argparse
import numpy as np
from joblib import Memory
from sklearn.datasets import fetch_covtype, get_data_home
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import zero_one_loss
from sklearn.utils import check_array
# Memoize the data extraction and memory map the resulting
# train / test splits in readonly mode
memory = Memory(
os.path.join(get_data_home(), "covertype_benchmark_data"), mmap_mode="r"
)
@memory.cache
def load_data(dtype=np.float32, order="C", random_state=13):
"""Load the data, then cache and memmap the train/test split"""
######################################################################
# Load dataset
print("Loading dataset...")
data = fetch_covtype(
download_if_missing=True, shuffle=True, random_state=random_state
)
X = check_array(data["data"], dtype=dtype, order=order)
y = (data["target"] != 1).astype(int)
# Create train-test split (as [Joachims, 2006])
print("Creating train-test split...")
n_train = 522911
X_train = X[:n_train]
y_train = y[:n_train]
X_test = X[n_train:]
y_test = y[n_train:]
# Standardize first 10 features (the numerical ones)
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
mean[10:] = 0.0
std[10:] = 1.0
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std
return X_train, X_test, y_train, y_test
ESTIMATORS = {
"GBRT": GradientBoostingClassifier(n_estimators=250),
"ExtraTrees": ExtraTreesClassifier(n_estimators=20),
"RandomForest": RandomForestClassifier(n_estimators=20),
"CART": DecisionTreeClassifier(min_samples_split=5),
"SGD": SGDClassifier(alpha=0.001),
"GaussianNB": GaussianNB(),
"liblinear": LinearSVC(loss="l2", penalty="l2", C=1000, dual=False, tol=1e-3),
"SAG": LogisticRegression(solver="sag", max_iter=2, C=1000),
}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--classifiers",
nargs="+",
choices=ESTIMATORS,
type=str,
default=["liblinear", "GaussianNB", "SGD", "CART"],
help="list of classifiers to benchmark.",
)
parser.add_argument(
"--n-jobs",
nargs="?",
default=1,
type=int,
help=(
"Number of concurrently running workers for "
"models that support parallelism."
),
)
parser.add_argument(
"--order",
nargs="?",
default="C",
type=str,
choices=["F", "C"],
help="Allow to choose between fortran and C ordered data",
)
parser.add_argument(
"--random-seed",
nargs="?",
default=13,
type=int,
help="Common seed used by random number generator.",
)
args = vars(parser.parse_args())
print(__doc__)
X_train, X_test, y_train, y_test = load_data(
order=args["order"], random_state=args["random_seed"]
)
print("")
print("Dataset statistics:")
print("===================")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print(
"%s %d (pos=%d, neg=%d, size=%dMB)"
% (
"number of train samples:".ljust(25),
X_train.shape[0],
np.sum(y_train == 1),
np.sum(y_train == 0),
int(X_train.nbytes / 1e6),
)
)
print(
"%s %d (pos=%d, neg=%d, size=%dMB)"
% (
"number of test samples:".ljust(25),
X_test.shape[0],
np.sum(y_test == 1),
np.sum(y_test == 0),
int(X_test.nbytes / 1e6),
)
)
print()
print("Training Classifiers")
print("====================")
error, train_time, test_time = {}, {}, {}
for name in sorted(args["classifiers"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(
**{
p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")
}
)
if "n_jobs" in estimator_params:
estimator.set_params(n_jobs=args["n_jobs"])
time_start = time()
estimator.fit(X_train, y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = zero_one_loss(y_test, y_pred)
print("done")
print()
print("Classification performance:")
print("===========================")
print("%s %s %s %s" % ("Classifier ", "train-time", "test-time", "error-rate"))
print("-" * 44)
for name in sorted(args["classifiers"], key=error.get):
print(
"%s %s %s %s"
% (
name.ljust(12),
("%.4fs" % train_time[name]).center(10),
("%.4fs" % test_time[name]).center(10),
("%.4f" % error[name]).center(10),
)
)
print()
================================================
FILE: benchmarks/bench_feature_expansions.py
================================================
import matplotlib.pyplot as plt
import numpy as np
import scipy.sparse as sparse
from sklearn.preprocessing import PolynomialFeatures
from time import time
degree = 2
trials = 3
num_rows = 1000
dimensionalities = np.array([1, 2, 8, 16, 32, 64])
densities = np.array([0.01, 0.1, 1.0])
csr_times = {d: np.zeros(len(dimensionalities)) for d in densities}
dense_times = {d: np.zeros(len(dimensionalities)) for d in densities}
transform = PolynomialFeatures(
degree=degree, include_bias=False, interaction_only=False
)
for trial in range(trials):
for density in densities:
for dim_index, dim in enumerate(dimensionalities):
print(trial, density, dim)
X_csr = sparse.random(num_rows, dim, density).tocsr()
X_dense = X_csr.toarray()
# CSR
t0 = time()
transform.fit_transform(X_csr)
csr_times[density][dim_index] += time() - t0
# Dense
t0 = time()
transform.fit_transform(X_dense)
dense_times[density][dim_index] += time() - t0
csr_linestyle = (0, (3, 1, 1, 1, 1, 1)) # densely dashdotdotted
dense_linestyle = (0, ()) # solid
fig, axes = plt.subplots(nrows=len(densities), ncols=1, figsize=(8, 10))
for density, ax in zip(densities, axes):
ax.plot(
dimensionalities,
csr_times[density] / trials,
label="csr",
linestyle=csr_linestyle,
)
ax.plot(
dimensionalities,
dense_times[density] / trials,
label="dense",
linestyle=dense_linestyle,
)
ax.set_title("density %0.2f, degree=%d, n_samples=%d" % (density, degree, num_rows))
ax.legend()
ax.set_xlabel("Dimensionality")
ax.set_ylabel("Time (seconds)")
plt.tight_layout()
plt.show()
================================================
FILE: benchmarks/bench_glm.py
================================================
"""
A comparison of different methods in GLM
Data comes from a random square matrix.
"""
from datetime import datetime
import numpy as np
from sklearn import linear_model
if __name__ == "__main__":
import matplotlib.pyplot as plt
n_iter = 40
time_ridge = np.empty(n_iter)
time_ols = np.empty(n_iter)
time_lasso = np.empty(n_iter)
dimensions = 500 * np.arange(1, n_iter + 1)
for i in range(n_iter):
print("Iteration %s of %s" % (i, n_iter))
n_samples, n_features = 10 * i + 3, 10 * i + 3
X = np.random.randn(n_samples, n_features)
Y = np.random.randn(n_samples)
start = datetime.now()
ridge = linear_model.Ridge(alpha=1.0)
ridge.fit(X, Y)
time_ridge[i] = (datetime.now() - start).total_seconds()
start = datetime.now()
ols = linear_model.LinearRegression()
ols.fit(X, Y)
time_ols[i] = (datetime.now() - start).total_seconds()
start = datetime.now()
lasso = linear_model.LassoLars()
lasso.fit(X, Y)
time_lasso[i] = (datetime.now() - start).total_seconds()
plt.figure("scikit-learn GLM benchmark results")
plt.xlabel("Dimensions")
plt.ylabel("Time (s)")
plt.plot(dimensions, time_ridge, color="r")
plt.plot(dimensions, time_ols, color="g")
plt.plot(dimensions, time_lasso, color="b")
plt.legend(["Ridge", "OLS", "LassoLars"], loc="upper left")
plt.axis("tight")
plt.show()
================================================
FILE: benchmarks/bench_glmnet.py
================================================
"""
To run this, you'll need to have installed.
* glmnet-python
* scikit-learn (of course)
Does two benchmarks
First, we fix a training set and increase the number of
samples. Then we plot the computation time as function of
the number of samples.
In the second benchmark, we increase the number of dimensions of the
training set. Then we plot the computation time as function of
the number of dimensions.
In both cases, only 10% of the features are informative.
"""
import numpy as np
import gc
from time import time
from sklearn.datasets import make_regression
alpha = 0.1
# alpha = 0.01
def rmse(a, b):
return np.sqrt(np.mean((a - b) ** 2))
def bench(factory, X, Y, X_test, Y_test, ref_coef):
gc.collect()
# start time
tstart = time()
clf = factory(alpha=alpha).fit(X, Y)
delta = time() - tstart
# stop time
print("duration: %0.3fs" % delta)
print("rmse: %f" % rmse(Y_test, clf.predict(X_test)))
print("mean coef abs diff: %f" % abs(ref_coef - clf.coef_.ravel()).mean())
return delta
if __name__ == "__main__":
from glmnet.elastic_net import Lasso as GlmnetLasso
from sklearn.linear_model import Lasso as ScikitLasso
# Delayed import of matplotlib.pyplot
import matplotlib.pyplot as plt
scikit_results = []
glmnet_results = []
n = 20
step = 500
n_features = 1000
n_informative = n_features / 10
n_test_samples = 1000
for i in range(1, n + 1):
print("==================")
print("Iteration %s of %s" % (i, n))
print("==================")
X, Y, coef_ = make_regression(
n_samples=(i * step) + n_test_samples,
n_features=n_features,
noise=0.1,
n_informative=n_informative,
coef=True,
)
X_test = X[-n_test_samples:]
Y_test = Y[-n_test_samples:]
X = X[: (i * step)]
Y = Y[: (i * step)]
print("benchmarking scikit-learn: ")
scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef_))
print("benchmarking glmnet: ")
glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef_))
plt.clf()
xx = range(0, n * step, step)
plt.title("Lasso regression on sample dataset (%d features)" % n_features)
plt.plot(xx, scikit_results, "b-", label="scikit-learn")
plt.plot(xx, glmnet_results, "r-", label="glmnet")
plt.legend()
plt.xlabel("number of samples to classify")
plt.ylabel("Time (s)")
plt.show()
# now do a benchmark where the number of points is fixed
# and the variable is the number of features
scikit_results = []
glmnet_results = []
n = 20
step = 100
n_samples = 500
for i in range(1, n + 1):
print("==================")
print("Iteration %02d of %02d" % (i, n))
print("==================")
n_features = i * step
n_informative = n_features / 10
X, Y, coef_ = make_regression(
n_samples=(i * step) + n_test_samples,
n_features=n_features,
noise=0.1,
n_informative=n_informative,
coef=True,
)
X_test = X[-n_test_samples:]
Y_test = Y[-n_test_samples:]
X = X[:n_samples]
Y = Y[:n_samples]
print("benchmarking scikit-learn: ")
scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef_))
print("benchmarking glmnet: ")
glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef_))
xx = np.arange(100, 100 + n * step, step)
plt.figure("scikit-learn vs. glmnet benchmark results")
plt.title("Regression in high dimensional spaces (%d samples)" % n_samples)
plt.plot(xx, scikit_results, "b-", label="scikit-learn")
plt.plot(xx, glmnet_results, "r-", label="glmnet")
plt.legend()
plt.xlabel("number of features")
plt.ylabel("Time (s)")
plt.axis("tight")
plt.show()
================================================
FILE: benchmarks/bench_hist_gradient_boosting.py
================================================
from time import time
import argparse
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.datasets import make_regression
from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
parser = argparse.ArgumentParser()
parser.add_argument("--n-leaf-nodes", type=int, default=31)
parser.add_argument("--n-trees", type=int, default=10)
parser.add_argument(
"--lightgbm", action="store_true", default=False, help="also plot lightgbm"
)
parser.add_argument(
"--xgboost", action="store_true", default=False, help="also plot xgboost"
)
parser.add_argument(
"--catboost", action="store_true", default=False, help="also plot catboost"
)
parser.add_argument("--learning-rate", type=float, default=0.1)
parser.add_argument(
"--problem",
type=str,
default="classification",
choices=["classification", "regression"],
)
parser.add_argument("--loss", type=str, default="default")
parser.add_argument("--missing-fraction", type=float, default=0)
parser.add_argument("--n-classes", type=int, default=2)
parser.add_argument("--n-samples-max", type=int, default=int(1e6))
parser.add_argument("--n-features", type=int, default=20)
parser.add_argument("--max-bins", type=int, default=255)
parser.add_argument(
"--random-sample-weights",
action="store_true",
default=False,
help="generate and use random sample weights",
)
args = parser.parse_args()
n_leaf_nodes = args.n_leaf_nodes
n_trees = args.n_trees
lr = args.learning_rate
max_bins = args.max_bins
def get_estimator_and_data():
if args.problem == "classification":
X, y = make_classification(
args.n_samples_max * 2,
n_features=args.n_features,
n_classes=args.n_classes,
n_clusters_per_class=1,
n_informative=args.n_classes,
random_state=0,
)
return X, y, HistGradientBoostingClassifier
elif args.problem == "regression":
X, y = make_regression(
args.n_samples_max * 2, n_features=args.n_features, random_state=0
)
return X, y, HistGradientBoostingRegressor
X, y, Estimator = get_estimator_and_data()
if args.missing_fraction:
mask = np.random.binomial(1, args.missing_fraction, size=X.shape).astype(bool)
X[mask] = np.nan
if args.random_sample_weights:
sample_weight = np.random.rand(len(X)) * 10
else:
sample_weight = None
if sample_weight is not None:
(X_train_, X_test_, y_train_, y_test_, sample_weight_train_, _) = train_test_split(
X, y, sample_weight, test_size=0.5, random_state=0
)
else:
X_train_, X_test_, y_train_, y_test_ = train_test_split(
X, y, test_size=0.5, random_state=0
)
sample_weight_train_ = None
def one_run(n_samples):
X_train = X_train_[:n_samples]
X_test = X_test_[:n_samples]
y_train = y_train_[:n_samples]
y_test = y_test_[:n_samples]
if sample_weight is not None:
sample_weight_train = sample_weight_train_[:n_samples]
else:
sample_weight_train = None
assert X_train.shape[0] == n_samples
assert X_test.shape[0] == n_samples
print("Data size: %d samples train, %d samples test." % (n_samples, n_samples))
print("Fitting a sklearn model...")
tic = time()
est = Estimator(
learning_rate=lr,
max_iter=n_trees,
max_bins=max_bins,
max_leaf_nodes=n_leaf_nodes,
early_stopping=False,
random_state=0,
verbose=0,
)
loss = args.loss
if args.problem == "classification":
if loss == "default":
# loss='auto' does not work with get_equivalent_estimator()
loss = (
"binary_crossentropy"
if args.n_classes == 2
else "categorical_crossentropy"
)
else:
# regression
if loss == "default":
loss = "squared_error"
est.set_params(loss=loss)
est.fit(X_train, y_train, sample_weight=sample_weight_train)
sklearn_fit_duration = time() - tic
tic = time()
sklearn_score = est.score(X_test, y_test)
sklearn_score_duration = time() - tic
print("score: {:.4f}".format(sklearn_score))
print("fit duration: {:.3f}s,".format(sklearn_fit_duration))
print("score duration: {:.3f}s,".format(sklearn_score_duration))
lightgbm_score = None
lightgbm_fit_duration = None
lightgbm_score_duration = None
if args.lightgbm:
print("Fitting a LightGBM model...")
lightgbm_est = get_equivalent_estimator(
est, lib="lightgbm", n_classes=args.n_classes
)
tic = time()
lightgbm_est.fit(X_train, y_train, sample_weight=sample_weight_train)
lightgbm_fit_duration = time() - tic
tic = time()
lightgbm_score = lightgbm_est.score(X_test, y_test)
lightgbm_score_duration = time() - tic
print("score: {:.4f}".format(lightgbm_score))
print("fit duration: {:.3f}s,".format(lightgbm_fit_duration))
print("score duration: {:.3f}s,".format(lightgbm_score_duration))
xgb_score = None
xgb_fit_duration = None
xgb_score_duration = None
if args.xgboost:
print("Fitting an XGBoost model...")
xgb_est = get_equivalent_estimator(est, lib="xgboost")
tic = time()
xgb_est.fit(X_train, y_train, sample_weight=sample_weight_train)
xgb_fit_duration = time() - tic
tic = time()
xgb_score = xgb_est.score(X_test, y_test)
xgb_score_duration = time() - tic
print("score: {:.4f}".format(xgb_score))
print("fit duration: {:.3f}s,".format(xgb_fit_duration))
print("score duration: {:.3f}s,".format(xgb_score_duration))
cat_score = None
cat_fit_duration = None
cat_score_duration = None
if args.catboost:
print("Fitting a CatBoost model...")
cat_est = get_equivalent_estimator(est, lib="catboost")
tic = time()
cat_est.fit(X_train, y_train, sample_weight=sample_weight_train)
cat_fit_duration = time() - tic
tic = time()
cat_score = cat_est.score(X_test, y_test)
cat_score_duration = time() - tic
print("score: {:.4f}".format(cat_score))
print("fit duration: {:.3f}s,".format(cat_fit_duration))
print("score duration: {:.3f}s,".format(cat_score_duration))
return (
sklearn_score,
sklearn_fit_duration,
sklearn_score_duration,
lightgbm_score,
lightgbm_fit_duration,
lightgbm_score_duration,
xgb_score,
xgb_fit_duration,
xgb_score_duration,
cat_score,
cat_fit_duration,
cat_score_duration,
)
n_samples_list = [1000, 10000, 100000, 500000, 1000000, 5000000, 10000000]
n_samples_list = [
n_samples for n_samples in n_samples_list if n_samples <= args.n_samples_max
]
sklearn_scores = []
sklearn_fit_durations = []
sklearn_score_durations = []
lightgbm_scores = []
lightgbm_fit_durations = []
lightgbm_score_durations = []
xgb_scores = []
xgb_fit_durations = []
xgb_score_durations = []
cat_scores = []
cat_fit_durations = []
cat_score_durations = []
for n_samples in n_samples_list:
(
sklearn_score,
sklearn_fit_duration,
sklearn_score_duration,
lightgbm_score,
lightgbm_fit_duration,
lightgbm_score_duration,
xgb_score,
xgb_fit_duration,
xgb_score_duration,
cat_score,
cat_fit_duration,
cat_score_duration,
) = one_run(n_samples)
for scores, score in (
(sklearn_scores, sklearn_score),
(sklearn_fit_durations, sklearn_fit_duration),
(sklearn_score_durations, sklearn_score_duration),
(lightgbm_scores, lightgbm_score),
(lightgbm_fit_durations, lightgbm_fit_duration),
(lightgbm_score_durations, lightgbm_score_duration),
(xgb_scores, xgb_score),
(xgb_fit_durations, xgb_fit_duration),
(xgb_score_durations, xgb_score_duration),
(cat_scores, cat_score),
(cat_fit_durations, cat_fit_duration),
(cat_score_durations, cat_score_duration),
):
scores.append(score)
fig, axs = plt.subplots(3, sharex=True)
axs[0].plot(n_samples_list, sklearn_scores, label="sklearn")
axs[1].plot(n_samples_list, sklearn_fit_durations, label="sklearn")
axs[2].plot(n_samples_list, sklearn_score_durations, label="sklearn")
if args.lightgbm:
axs[0].plot(n_samples_list, lightgbm_scores, label="lightgbm")
axs[1].plot(n_samples_list, lightgbm_fit_durations, label="lightgbm")
axs[2].plot(n_samples_list, lightgbm_score_durations, label="lightgbm")
if args.xgboost:
axs[0].plot(n_samples_list, xgb_scores, label="XGBoost")
axs[1].plot(n_samples_list, xgb_fit_durations, label="XGBoost")
axs[2].plot(n_samples_list, xgb_score_durations, label="XGBoost")
if args.catboost:
axs[0].plot(n_samples_list, cat_scores, label="CatBoost")
axs[1].plot(n_samples_list, cat_fit_durations, label="CatBoost")
axs[2].plot(n_samples_list, cat_score_durations, label="CatBoost")
for ax in axs:
ax.set_xscale("log")
ax.legend(loc="best")
ax.set_xlabel("n_samples")
axs[0].set_title("scores")
axs[1].set_title("fit duration (s)")
axs[2].set_title("score duration (s)")
title = args.problem
if args.problem == "classification":
title += " n_classes = {}".format(args.n_classes)
fig.suptitle(title)
plt.tight_layout()
plt.show()
================================================
FILE: benchmarks/bench_hist_gradient_boosting_adult.py
================================================
import argparse
from time import time
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
parser = argparse.ArgumentParser()
parser.add_argument("--n-leaf-nodes", type=int, default=31)
parser.add_argument("--n-trees", type=int, default=100)
parser.add_argument("--lightgbm", action="store_true", default=False)
parser.add_argument("--learning-rate", type=float, default=0.1)
parser.add_argument("--max-bins", type=int, default=255)
parser.add_argument("--no-predict", action="store_true", default=False)
parser.add_argument("--verbose", action="store_true", default=False)
args = parser.parse_args()
n_leaf_nodes = args.n_leaf_nodes
n_trees = args.n_trees
lr = args.learning_rate
max_bins = args.max_bins
verbose = args.verbose
def fit(est, data_train, target_train, libname, **fit_params):
print(f"Fitting a {libname} model...")
tic = time()
est.fit(data_train, target_train, **fit_params)
toc = time()
print(f"fitted in {toc - tic:.3f}s")
def predict(est, data_test, target_test):
if args.no_predict:
return
tic = time()
predicted_test = est.predict(data_test)
predicted_proba_test = est.predict_proba(data_test)
toc = time()
roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1])
acc = accuracy_score(target_test, predicted_test)
print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
data = fetch_openml(data_id=179, as_frame=False) # adult dataset
X, y = data.data, data.target
n_features = X.shape[1]
n_categorical_features = len(data.categories)
n_numerical_features = n_features - n_categorical_features
print(f"Number of features: {n_features}")
print(f"Number of categorical features: {n_categorical_features}")
print(f"Number of numerical features: {n_numerical_features}")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Note: no need to use an OrdinalEncoder because categorical features are
# already clean
is_categorical = [name in data.categories for name in data.feature_names]
est = HistGradientBoostingClassifier(
loss="binary_crossentropy",
learning_rate=lr,
max_iter=n_trees,
max_bins=max_bins,
max_leaf_nodes=n_leaf_nodes,
categorical_features=is_categorical,
early_stopping=False,
random_state=0,
verbose=verbose,
)
fit(est, X_train, y_train, "sklearn")
predict(est, X_test, y_test)
if args.lightgbm:
est = get_equivalent_estimator(est, lib="lightgbm")
est.set_params(max_cat_to_onehot=1) # dont use OHE
categorical_features = [
f_idx for (f_idx, is_cat) in enumerate(is_categorical) if is_cat
]
fit(est, X_train, y_train, "lightgbm", categorical_feature=categorical_features)
predict(est, X_test, y_test)
================================================
FILE: benchmarks/bench_hist_gradient_boosting_categorical_only.py
================================================
import argparse
from time import time
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.datasets import make_classification
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
parser = argparse.ArgumentParser()
parser.add_argument("--n-leaf-nodes", type=int, default=31)
parser.add_argument("--n-trees", type=int, default=100)
parser.add_argument("--n-features", type=int, default=20)
parser.add_argument("--n-cats", type=int, default=20)
parser.add_argument("--n-samples", type=int, default=10_000)
parser.add_argument("--lightgbm", action="store_true", default=False)
parser.add_argument("--learning-rate", type=float, default=0.1)
parser.add_argument("--max-bins", type=int, default=255)
parser.add_argument("--no-predict", action="store_true", default=False)
parser.add_argument("--verbose", action="store_true", default=False)
args = parser.parse_args()
n_leaf_nodes = args.n_leaf_nodes
n_features = args.n_features
n_categories = args.n_cats
n_samples = args.n_samples
n_trees = args.n_trees
lr = args.learning_rate
max_bins = args.max_bins
verbose = args.verbose
def fit(est, data_train, target_train, libname, **fit_params):
print(f"Fitting a {libname} model...")
tic = time()
est.fit(data_train, target_train, **fit_params)
toc = time()
print(f"fitted in {toc - tic:.3f}s")
def predict(est, data_test):
# We don't report accuracy or ROC because the dataset doesn't really make
# sense: we treat ordered features as un-ordered categories.
if args.no_predict:
return
tic = time()
est.predict(data_test)
toc = time()
print(f"predicted in {toc - tic:.3f}s")
X, y = make_classification(n_samples=n_samples, n_features=n_features, random_state=0)
X = KBinsDiscretizer(n_bins=n_categories, encode="ordinal").fit_transform(X)
print(f"Number of features: {n_features}")
print(f"Number of samples: {n_samples}")
is_categorical = [True] * n_features
est = HistGradientBoostingClassifier(
loss="binary_crossentropy",
learning_rate=lr,
max_iter=n_trees,
max_bins=max_bins,
max_leaf_nodes=n_leaf_nodes,
categorical_features=is_categorical,
early_stopping=False,
random_state=0,
verbose=verbose,
)
fit(est, X, y, "sklearn")
predict(est, X)
if args.lightgbm:
est = get_equivalent_estimator(est, lib="lightgbm")
est.set_params(max_cat_to_onehot=1) # dont use OHE
categorical_features = list(range(n_features))
fit(est, X, y, "lightgbm", categorical_feature=categorical_features)
predict(est, X)
================================================
FILE: benchmarks/bench_hist_gradient_boosting_higgsboson.py
================================================
from urllib.request import urlretrieve
import os
from gzip import GzipFile
from time import time
import argparse
import numpy as np
import pandas as pd
from joblib import Memory
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
parser = argparse.ArgumentParser()
parser.add_argument("--n-leaf-nodes", type=int, default=31)
parser.add_argument("--n-trees", type=int, default=10)
parser.add_argument("--lightgbm", action="store_true", default=False)
parser.add_argument("--xgboost", action="store_true", default=False)
parser.add_argument("--catboost", action="store_true", default=False)
parser.add_argument("--learning-rate", type=float, default=1.0)
parser.add_argument("--subsample", type=int, default=None)
parser.add_argument("--max-bins", type=int, default=255)
parser.add_argument("--no-predict", action="store_true", default=False)
parser.add_argument("--cache-loc", type=str, default="/tmp")
args = parser.parse_args()
HERE = os.path.dirname(__file__)
URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz"
m = Memory(location=args.cache_loc, mmap_mode="r")
n_leaf_nodes = args.n_leaf_nodes
n_trees = args.n_trees
subsample = args.subsample
lr = args.learning_rate
max_bins = args.max_bins
@m.cache
def load_data():
filename = os.path.join(HERE, URL.rsplit("/", 1)[-1])
if not os.path.exists(filename):
print(f"Downloading {URL} to {filename} (2.6 GB)...")
urlretrieve(URL, filename)
print("done.")
print(f"Parsing {filename}...")
tic = time()
with GzipFile(filename) as f:
df = pd.read_csv(f, header=None, dtype=np.float32)
toc = time()
print(f"Loaded {df.values.nbytes / 1e9:0.3f} GB in {toc - tic:0.3f}s")
return df
def fit(est, data_train, target_train, libname):
print(f"Fitting a {libname} model...")
tic = time()
est.fit(data_train, target_train)
toc = time()
print(f"fitted in {toc - tic:.3f}s")
def predict(est, data_test, target_test):
if args.no_predict:
return
tic = time()
predicted_test = est.predict(data_test)
predicted_proba_test = est.predict_proba(data_test)
toc = time()
roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1])
acc = accuracy_score(target_test, predicted_test)
print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
df = load_data()
target = df.values[:, 0]
data = np.ascontiguousarray(df.values[:, 1:])
data_train, data_test, target_train, target_test = train_test_split(
data, target, test_size=0.2, random_state=0
)
if subsample is not None:
data_train, target_train = data_train[:subsample], target_train[:subsample]
n_samples, n_features = data_train.shape
print(f"Training set with {n_samples} records with {n_features} features.")
est = HistGradientBoostingClassifier(
loss="binary_crossentropy",
learning_rate=lr,
max_iter=n_trees,
max_bins=max_bins,
max_leaf_nodes=n_leaf_nodes,
early_stopping=False,
random_state=0,
verbose=1,
)
fit(est, data_train, target_train, "sklearn")
predict(est, data_test, target_test)
if args.lightgbm:
est = get_equivalent_estimator(est, lib="lightgbm")
fit(est, data_train, target_train, "lightgbm")
predict(est, data_test, target_test)
if args.xgboost:
est = get_equivalent_estimator(est, lib="xgboost")
fit(est, data_train, target_train, "xgboost")
predict(est, data_test, target_test)
if args.catboost:
est = get_equivalent_estimator(est, lib="catboost")
fit(est, data_train, target_train, "catboost")
predict(est, data_test, target_test)
================================================
FILE: benchmarks/bench_hist_gradient_boosting_threading.py
================================================
from time import time
import argparse
import os
from pprint import pprint
import numpy as np
from threadpoolctl import threadpool_limits
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.datasets import make_regression
from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
parser = argparse.ArgumentParser()
parser.add_argument("--n-leaf-nodes", type=int, default=31)
parser.add_argument("--n-trees", type=int, default=10)
parser.add_argument(
"--lightgbm", action="store_true", default=False, help="also benchmark lightgbm"
)
parser.add_argument(
"--xgboost", action="store_true", default=False, help="also benchmark xgboost"
)
parser.add_argument(
"--catboost", action="store_true", default=False, help="also benchmark catboost"
)
parser.add_argument("--learning-rate", type=float, default=0.1)
parser.add_argument(
"--problem",
type=str,
default="classification",
choices=["classification", "regression"],
)
parser.add_argument("--loss", type=str, default="default")
parser.add_argument("--missing-fraction", type=float, default=0)
parser.add_argument("--n-classes", type=int, default=2)
parser.add_argument("--n-samples", type=int, default=int(1e6))
parser.add_argument("--n-features", type=int, default=100)
parser.add_argument("--max-bins", type=int, default=255)
parser.add_argument("--print-params", action="store_true", default=False)
parser.add_argument(
"--random-sample-weights",
action="store_true",
default=False,
help="generate and use random sample weights",
)
parser.add_argument(
"--plot", action="store_true", default=False, help="show a plot results"
)
parser.add_argument(
"--plot-filename", default=None, help="filename to save the figure to disk"
)
args = parser.parse_args()
n_samples = args.n_samples
n_leaf_nodes = args.n_leaf_nodes
n_trees = args.n_trees
lr = args.learning_rate
max_bins = args.max_bins
print("Data size: %d samples train, %d samples test." % (n_samples, n_samples))
print(f"n_features: {args.n_features}")
def get_estimator_and_data():
if args.problem == "classification":
X, y = make_classification(
args.n_samples * 2,
n_features=args.n_features,
n_classes=args.n_classes,
n_clusters_per_class=1,
n_informative=args.n_features // 2,
random_state=0,
)
return X, y, HistGradientBoostingClassifier
elif args.problem == "regression":
X, y = make_regression(
args.n_samples_max * 2, n_features=args.n_features, random_state=0
)
return X, y, HistGradientBoostingRegressor
X, y, Estimator = get_estimator_and_data()
if args.missing_fraction:
mask = np.random.binomial(1, args.missing_fraction, size=X.shape).astype(bool)
X[mask] = np.nan
if args.random_sample_weights:
sample_weight = np.random.rand(len(X)) * 10
else:
sample_weight = None
if sample_weight is not None:
(X_train_, X_test_, y_train_, y_test_, sample_weight_train_, _) = train_test_split(
X, y, sample_weight, test_size=0.5, random_state=0
)
else:
X_train_, X_test_, y_train_, y_test_ = train_test_split(
X, y, test_size=0.5, random_state=0
)
sample_weight_train_ = None
sklearn_est = Estimator(
learning_rate=lr,
max_iter=n_trees,
max_bins=max_bins,
max_leaf_nodes=n_leaf_nodes,
early_stopping=False,
random_state=0,
verbose=0,
)
loss = args.loss
if args.problem == "classification":
if loss == "default":
# loss='auto' does not work with get_equivalent_estimator()
loss = (
"binary_crossentropy" if args.n_classes == 2 else "categorical_crossentropy"
)
else:
# regression
if loss == "default":
loss = "squared_error"
sklearn_est.set_params(loss=loss)
if args.print_params:
print("scikit-learn")
pprint(sklearn_est.get_params())
for libname in ["lightgbm", "xgboost", "catboost"]:
if getattr(args, libname):
print(libname)
est = get_equivalent_estimator(
sklearn_est, lib=libname, n_classes=args.n_classes
)
pprint(est.get_params())
def one_run(n_threads, n_samples):
X_train = X_train_[:n_samples]
X_test = X_test_[:n_samples]
y_train = y_train_[:n_samples]
y_test = y_test_[:n_samples]
if sample_weight is not None:
sample_weight_train = sample_weight_train_[:n_samples]
else:
sample_weight_train = None
assert X_train.shape[0] == n_samples
assert X_test.shape[0] == n_samples
print("Fitting a sklearn model...")
tic = time()
est = sklearn.base.clone(sklearn_est)
with threadpool_limits(n_threads, user_api="openmp"):
est.fit(X_train, y_train, sample_weight=sample_weight_train)
sklearn_fit_duration = time() - tic
tic = time()
sklearn_score = est.score(X_test, y_test)
sklearn_score_duration = time() - tic
print("score: {:.4f}".format(sklearn_score))
print("fit duration: {:.3f}s,".format(sklearn_fit_duration))
print("score duration: {:.3f}s,".format(sklearn_score_duration))
lightgbm_score = None
lightgbm_fit_duration = None
lightgbm_score_duration = None
if args.lightgbm:
print("Fitting a LightGBM model...")
lightgbm_est = get_equivalent_estimator(
est, lib="lightgbm", n_classes=args.n_classes
)
lightgbm_est.set_params(num_threads=n_threads)
tic = time()
lightgbm_est.fit(X_train, y_train, sample_weight=sample_weight_train)
lightgbm_fit_duration = time() - tic
tic = time()
lightgbm_score = lightgbm_est.score(X_test, y_test)
lightgbm_score_duration = time() - tic
print("score: {:.4f}".format(lightgbm_score))
print("fit duration: {:.3f}s,".format(lightgbm_fit_duration))
print("score duration: {:.3f}s,".format(lightgbm_score_duration))
xgb_score = None
xgb_fit_duration = None
xgb_score_duration = None
if args.xgboost:
print("Fitting an XGBoost model...")
xgb_est = get_equivalent_estimator(est, lib="xgboost")
xgb_est.set_params(nthread=n_threads)
tic = time()
xgb_est.fit(X_train, y_train, sample_weight=sample_weight_train)
xgb_fit_duration = time() - tic
tic = time()
xgb_score = xgb_est.score(X_test, y_test)
xgb_score_duration = time() - tic
print("score: {:.4f}".format(xgb_score))
print("fit duration: {:.3f}s,".format(xgb_fit_duration))
print("score duration: {:.3f}s,".format(xgb_score_duration))
cat_score = None
cat_fit_duration = None
cat_score_duration = None
if args.catboost:
print("Fitting a CatBoost model...")
cat_est = get_equivalent_estimator(est, lib="catboost")
cat_est.set_params(thread_count=n_threads)
tic = time()
cat_est.fit(X_train, y_train, sample_weight=sample_weight_train)
cat_fit_duration = time() - tic
tic = time()
cat_score = cat_est.score(X_test, y_test)
cat_score_duration = time() - tic
print("score: {:.4f}".format(cat_score))
print("fit duration: {:.3f}s,".format(cat_fit_duration))
print("score duration: {:.3f}s,".format(cat_score_duration))
return (
sklearn_score,
sklearn_fit_duration,
sklearn_score_duration,
lightgbm_score,
lightgbm_fit_duration,
lightgbm_score_duration,
xgb_score,
xgb_fit_duration,
xgb_score_duration,
cat_score,
cat_fit_duration,
cat_score_duration,
)
max_threads = os.cpu_count()
n_threads_list = [2 ** i for i in range(8) if (2 ** i) < max_threads]
n_threads_list.append(max_threads)
sklearn_scores = []
sklearn_fit_durations = []
sklearn_score_durations = []
lightgbm_scores = []
lightgbm_fit_durations = []
lightgbm_score_durations = []
xgb_scores = []
xgb_fit_durations = []
xgb_score_durations = []
cat_scores = []
cat_fit_durations = []
cat_score_durations = []
for n_threads in n_threads_list:
print(f"n_threads: {n_threads}")
(
sklearn_score,
sklearn_fit_duration,
sklearn_score_duration,
lightgbm_score,
lightgbm_fit_duration,
lightgbm_score_duration,
xgb_score,
xgb_fit_duration,
xgb_score_duration,
cat_score,
cat_fit_duration,
cat_score_duration,
) = one_run(n_threads, n_samples)
for scores, score in (
(sklearn_scores, sklearn_score),
(sklearn_fit_durations, sklearn_fit_duration),
(sklearn_score_durations, sklearn_score_duration),
(lightgbm_scores, lightgbm_score),
(lightgbm_fit_durations, lightgbm_fit_duration),
(lightgbm_score_durations, lightgbm_score_duration),
(xgb_scores, xgb_score),
(xgb_fit_durations, xgb_fit_duration),
(xgb_score_durations, xgb_score_duration),
(cat_scores, cat_score),
(cat_fit_durations, cat_fit_duration),
(cat_score_durations, cat_score_duration),
):
scores.append(score)
if args.plot or args.plot_filename:
import matplotlib.pyplot as plt
import matplotlib
fig, axs = plt.subplots(2, figsize=(12, 12))
label = f"sklearn {sklearn.__version__}"
axs[0].plot(n_threads_list, sklearn_fit_durations, label=label)
axs[1].plot(n_threads_list, sklearn_score_durations, label=label)
if args.lightgbm:
import lightgbm
label = f"LightGBM {lightgbm.__version__}"
axs[0].plot(n_threads_list, lightgbm_fit_durations, label=label)
axs[1].plot(n_threads_list, lightgbm_score_durations, label=label)
if args.xgboost:
import xgboost
label = f"XGBoost {xgboost.__version__}"
axs[0].plot(n_threads_list, xgb_fit_durations, label=label)
axs[1].plot(n_threads_list, xgb_score_durations, label=label)
if args.catboost:
import catboost
label = f"CatBoost {catboost.__version__}"
axs[0].plot(n_threads_list, cat_fit_durations, label=label)
axs[1].plot(n_threads_list, cat_score_durations, label=label)
for ax in axs:
ax.set_xscale("log")
ax.set_xlabel("n_threads")
ax.set_ylabel("duration (s)")
ax.set_ylim(0, None)
ax.set_xticks(n_threads_list)
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.legend(loc="best")
axs[0].set_title("fit duration (s)")
axs[1].set_title("score duration (s)")
title = args.problem
if args.problem == "classification":
title += " n_classes = {}".format(args.n_classes)
fig.suptitle(title)
plt.tight_layout()
if args.plot_filename:
plt.savefig(args.plot_filename)
if args.plot:
plt.show()
================================================
FILE: benchmarks/bench_isolation_forest.py
================================================
"""
==========================================
IsolationForest benchmark
==========================================
A test of IsolationForest on classical anomaly detection datasets.
The benchmark is run as follows:
1. The dataset is randomly split into a training set and a test set, both
assumed to contain outliers.
2. Isolation Forest is trained on the training set.
3. The ROC curve is computed on the test set using the knowledge of the labels.
Note that the smtp dataset contains a very small proportion of outliers.
Therefore, depending on the seed of the random number generator, randomly
splitting the data set might lead to a test set containing no outliers. In this
case a warning is raised when computing the ROC curve.
"""
from time import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.metrics import roc_curve, auc
from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import shuffle as sh
print(__doc__)
def print_outlier_ratio(y):
"""
Helper function to show the distinct value count of element in the target.
Useful indicator for the datasets used in bench_isolation_forest.py.
"""
uniq, cnt = np.unique(y, return_counts=True)
print("----- Target count values: ")
for u, c in zip(uniq, cnt):
print("------ %s -> %d occurrences" % (str(u), c))
print("----- Outlier ratio: %.5f" % (np.min(cnt) / len(y)))
random_state = 1
fig_roc, ax_roc = plt.subplots(1, 1, figsize=(8, 5))
# Set this to true for plotting score histograms for each dataset:
with_decision_function_histograms = False
# datasets available = ['http', 'smtp', 'SA', 'SF', 'shuttle', 'forestcover']
datasets = ["http", "smtp", "SA", "SF", "shuttle", "forestcover"]
# Loop over all datasets for fitting and scoring the estimator:
for dat in datasets:
# Loading and vectorizing the data:
print("====== %s ======" % dat)
print("--- Fetching data...")
if dat in ["http", "smtp", "SF", "SA"]:
dataset = fetch_kddcup99(
subset=dat, shuffle=True, percent10=True, random_state=random_state
)
X = dataset.data
y = dataset.target
if dat == "shuttle":
dataset = fetch_openml("shuttle")
X = dataset.data
y = dataset.target
X, y = sh(X, y, random_state=random_state)
# we remove data with label 4
# normal data are then those of class 1
s = y != 4
X = X[s, :]
y = y[s]
y = (y != 1).astype(int)
print("----- ")
if dat == "forestcover":
dataset = fetch_covtype(shuffle=True, random_state=random_state)
X = dataset.data
y = dataset.target
# normal data are those with attribute 2
# abnormal those with attribute 4
s = (y == 2) + (y == 4)
X = X[s, :]
y = y[s]
y = (y != 2).astype(int)
print_outlier_ratio(y)
print("--- Vectorizing data...")
if dat == "SF":
lb = LabelBinarizer()
x1 = lb.fit_transform(X[:, 1].astype(str))
X = np.c_[X[:, :1], x1, X[:, 2:]]
y = (y != b"normal.").astype(int)
print_outlier_ratio(y)
if dat == "SA":
lb = LabelBinarizer()
x1 = lb.fit_transform(X[:, 1].astype(str))
x2 = lb.fit_transform(X[:, 2].astype(str))
x3 = lb.fit_transform(X[:, 3].astype(str))
X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]]
y = (y != b"normal.").astype(int)
print_outlier_ratio(y)
if dat in ("http", "smtp"):
y = (y != b"normal.").astype(int)
print_outlier_ratio(y)
n_samples, n_features = X.shape
n_samples_train = n_samples // 2
X = X.astype(float)
X_train = X[:n_samples_train, :]
X_test = X[n_samples_train:, :]
y_train = y[:n_samples_train]
y_test = y[n_samples_train:]
print("--- Fitting the IsolationForest estimator...")
model = IsolationForest(n_jobs=-1, random_state=random_state)
tstart = time()
model.fit(X_train)
fit_time = time() - tstart
tstart = time()
scoring = -model.decision_function(X_test) # the lower, the more abnormal
print("--- Preparing the plot elements...")
if with_decision_function_histograms:
fig, ax = plt.subplots(3, sharex=True, sharey=True)
bins = np.linspace(-0.5, 0.5, 200)
ax[0].hist(scoring, bins, color="black")
ax[0].set_title("Decision function for %s dataset" % dat)
ax[1].hist(scoring[y_test == 0], bins, color="b", label="normal data")
ax[1].legend(loc="lower right")
ax[2].hist(scoring[y_test == 1], bins, color="r", label="outliers")
ax[2].legend(loc="lower right")
# Show ROC Curves
predict_time = time() - tstart
fpr, tpr, thresholds = roc_curve(y_test, scoring)
auc_score = auc(fpr, tpr)
label = "%s (AUC: %0.3f, train_time= %0.2fs, test_time= %0.2fs)" % (
dat,
auc_score,
fit_time,
predict_time,
)
# Print AUC score and train/test time:
print(label)
ax_roc.plot(fpr, tpr, lw=1, label=label)
ax_roc.set_xlim([-0.05, 1.05])
ax_roc.set_ylim([-0.05, 1.05])
ax_roc.set_xlabel("False Positive Rate")
ax_roc.set_ylabel("True Positive Rate")
ax_roc.set_title("Receiver operating characteristic (ROC) curves")
ax_roc.legend(loc="lower right")
fig_roc.tight_layout()
plt.show()
================================================
FILE: benchmarks/bench_isotonic.py
================================================
"""
Benchmarks of isotonic regression performance.
We generate a synthetic dataset of size 10^n, for n in [min, max], and
examine the time taken to run isotonic regression over the dataset.
The timings are then output to stdout, or visualized on a log-log scale
with matplotlib.
This allows the scaling of the algorithm with the problem size to be
visualized and understood.
"""
import numpy as np
import gc
from datetime import datetime
from sklearn.isotonic import isotonic_regression
from scipy.special import expit
import matplotlib.pyplot as plt
import argparse
def generate_perturbed_logarithm_dataset(size):
return np.random.randint(-50, 50, size=size) + 50.0 * np.log(1 + np.arange(size))
def generate_logistic_dataset(size):
X = np.sort(np.random.normal(size=size))
return np.random.random(size=size) < expit(X)
def generate_pathological_dataset(size):
# Triggers O(n^2) complexity on the original implementation.
return np.r_[
np.arange(size), np.arange(-(size - 1), size), np.arange(-(size - 1), 1)
]
DATASET_GENERATORS = {
"perturbed_logarithm": generate_perturbed_logarithm_dataset,
"logistic": generate_logistic_dataset,
"pathological": generate_pathological_dataset,
}
def bench_isotonic_regression(Y):
"""
Runs a single iteration of isotonic regression on the input data,
and reports the total time taken (in seconds).
"""
gc.collect()
tstart = datetime.now()
isotonic_regression(Y)
return (datetime.now() - tstart).total_seconds()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Isotonic Regression benchmark tool")
parser.add_argument("--seed", type=int, help="RNG seed")
parser.add_argument(
"--iterations",
type=int,
required=True,
help="Number of iterations to average timings over for each problem size",
)
parser.add_argument(
"--log_min_problem_size",
type=int,
required=True,
help="Base 10 logarithm of the minimum problem size",
)
parser.add_argument(
"--log_max_problem_size",
type=int,
required=True,
help="Base 10 logarithm of the maximum problem size",
)
parser.add_argument(
"--show_plot", action="store_true", help="Plot timing output with matplotlib"
)
parser.add_argument("--dataset", choices=DATASET_GENERATORS.keys(), required=True)
args = parser.parse_args()
np.random.seed(args.seed)
timings = []
for exponent in range(args.log_min_problem_size, args.log_max_problem_size):
n = 10 ** exponent
Y = DATASET_GENERATORS[args.dataset](n)
time_per_iteration = [
bench_isotonic_regression(Y) for i in range(args.iterations)
]
timing = (n, np.mean(time_per_iteration))
timings.append(timing)
# If we're not plotting, dump the timing to stdout
if not args.show_plot:
print(n, np.mean(time_per_iteration))
if args.show_plot:
plt.plot(*zip(*timings))
plt.title("Average time taken running isotonic regression")
plt.xlabel("Number of observations")
plt.ylabel("Time (s)")
plt.axis("tight")
plt.loglog()
plt.show()
================================================
FILE: benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
================================================
"""
=============================================================
Kernel PCA Solvers comparison benchmark: time vs n_components
=============================================================
This benchmark shows that the approximate solvers provided in Kernel PCA can
help significantly improve its execution speed when an approximate solution
(small `n_components`) is acceptable. In many real-world datasets a few
hundreds of principal components are indeed sufficient enough to capture the
underlying distribution.
Description:
------------
A fixed number of training (default: 2000) and test (default: 1000) samples
with 2 features is generated using the `make_circles` helper method.
KernelPCA models are trained on the training set with an increasing number of
principal components, between 1 and `max_n_compo` (default: 1999), with
`n_compo_grid_size` positions (default: 10). For each value of `n_components`
to try, KernelPCA models are trained for the various possible `eigen_solver`
values. The execution times are displayed in a plot at the end of the
experiment.
What you can observe:
---------------------
When the number of requested principal components is small, the dense solver
takes more time to complete, while the randomized method returns similar
results with shorter execution times.
Going further:
--------------
You can adjust `max_n_compo` and `n_compo_grid_size` if you wish to explore a
different range of values for `n_components`.
You can also set `arpack_all=True` to activate arpack solver for large number
of components (this takes more time).
"""
# Authors: Sylvain MARIE, Schneider Electric
import time
import numpy as np
import matplotlib.pyplot as plt
from numpy.testing import assert_array_almost_equal
from sklearn.decomposition import KernelPCA
from sklearn.datasets import make_circles
print(__doc__)
# 1- Design the Experiment
# ------------------------
n_train, n_test = 2000, 1000 # the sample sizes to use
max_n_compo = 1999 # max n_components to try
n_compo_grid_size = 10 # nb of positions in the grid to try
# generate the grid
n_compo_range = [
np.round(np.exp((x / (n_compo_grid_size - 1)) * np.log(max_n_compo)))
for x in range(0, n_compo_grid_size)
]
n_iter = 3 # the number of times each experiment will be repeated
arpack_all = False # set to True if you wish to run arpack for all n_compo
# 2- Generate random data
# -----------------------
n_features = 2
X, y = make_circles(
n_samples=(n_train + n_test), factor=0.3, noise=0.05, random_state=0
)
X_train, X_test = X[:n_train, :], X[n_train:, :]
# 3- Benchmark
# ------------
# init
ref_time = np.empty((len(n_compo_range), n_iter)) * np.nan
a_time = np.empty((len(n_compo_range), n_iter)) * np.nan
r_time = np.empty((len(n_compo_range), n_iter)) * np.nan
# loop
for j, n_components in enumerate(n_compo_range):
n_components = int(n_components)
print("Performing kPCA with n_components = %i" % n_components)
# A- reference (dense)
print(" - dense solver")
for i in range(n_iter):
start_time = time.perf_counter()
ref_pred = (
KernelPCA(n_components, eigen_solver="dense").fit(X_train).transform(X_test)
)
ref_time[j, i] = time.perf_counter() - start_time
# B- arpack (for small number of components only, too slow otherwise)
if arpack_all or n_components < 100:
print(" - arpack solver")
for i in range(n_iter):
start_time = time.perf_counter()
a_pred = (
KernelPCA(n_components, eigen_solver="arpack")
.fit(X_train)
.transform(X_test)
)
a_time[j, i] = time.perf_counter() - start_time
# check that the result is still correct despite the approx
assert_array_almost_equal(np.abs(a_pred), np.abs(ref_pred))
# C- randomized
print(" - randomized solver")
for i in range(n_iter):
start_time = time.perf_counter()
r_pred = (
KernelPCA(n_components, eigen_solver="randomized")
.fit(X_train)
.transform(X_test)
)
r_time[j, i] = time.perf_counter() - start_time
# check that the result is still correct despite the approximation
assert_array_almost_equal(np.abs(r_pred), np.abs(ref_pred))
# Compute statistics for the 3 methods
avg_ref_time = ref_time.mean(axis=1)
std_ref_time = ref_time.std(axis=1)
avg_a_time = a_time.mean(axis=1)
std_a_time = a_time.std(axis=1)
avg_r_time = r_time.mean(axis=1)
std_r_time = r_time.std(axis=1)
# 4- Plots
# --------
fig, ax = plt.subplots(figsize=(12, 8))
# Display 1 plot with error bars per method
ax.errorbar(
n_compo_range,
avg_ref_time,
yerr=std_ref_time,
marker="x",
linestyle="",
color="r",
label="full",
)
ax.errorbar(
n_compo_range,
avg_a_time,
yerr=std_a_time,
marker="x",
linestyle="",
color="g",
label="arpack",
)
ax.errorbar(
n_compo_range,
avg_r_time,
yerr=std_r_time,
marker="x",
linestyle="",
color="b",
label="randomized",
)
ax.legend(loc="upper left")
# customize axes
ax.set_xscale("log")
ax.set_xlim(1, max(n_compo_range) * 1.1)
ax.set_ylabel("Execution time (s)")
ax.set_xlabel("n_components")
ax.set_title(
"kPCA Execution time comparison on %i samples with %i "
"features, according to the choice of `eigen_solver`"
"" % (n_train, n_features)
)
plt.show()
================================================
FILE: benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
================================================
"""
==========================================================
Kernel PCA Solvers comparison benchmark: time vs n_samples
==========================================================
This benchmark shows that the approximate solvers provided in Kernel PCA can
help significantly improve its execution speed when an approximate solution
(small `n_components`) is acceptable. In many real-world datasets the number of
samples is very large, but a few hundreds of principal components are
sufficient enough to capture the underlying distribution.
Description:
------------
An increasing number of examples is used to train a KernelPCA, between
`min_n_samples` (default: 101) and `max_n_samples` (default: 4000) with
`n_samples_grid_size` positions (default: 4). Samples have 2 features, and are
generated using `make_circles`. For each training sample size, KernelPCA models
are trained for the various possible `eigen_solver` values. All of them are
trained to obtain `n_components` principal components (default: 100). The
execution times are displayed in a plot at the end of the experiment.
What you can observe:
---------------------
When the number of samples provided gets large, the dense solver takes a lot
of time to complete, while the randomized method returns similar results in
much shorter execution times.
Going further:
--------------
You can increase `max_n_samples` and `nb_n_samples_to_try` if you wish to
explore a wider range of values for `n_samples`.
You can also set `include_arpack=True` to add this other solver in the
experiments (much slower).
Finally you can have a look at the second example of this series, "Kernel PCA
Solvers comparison benchmark: time vs n_components", where this time the number
of examples is fixed, and the desired number of components varies.
"""
# Author: Sylvain MARIE, Schneider Electric
import time
import numpy as np
import matplotlib.pyplot as plt
from numpy.testing import assert_array_almost_equal
from sklearn.decomposition import KernelPCA
from sklearn.datasets import make_circles
print(__doc__)
# 1- Design the Experiment
# ------------------------
min_n_samples, max_n_samples = 101, 4000 # min and max n_samples to try
n_samples_grid_size = 4 # nb of positions in the grid to try
# generate the grid
n_samples_range = [
min_n_samples
+ np.floor((x / (n_samples_grid_size - 1)) * (max_n_samples - min_n_samples))
for x in range(0, n_samples_grid_size)
]
n_components = 100 # the number of principal components we want to use
n_iter = 3 # the number of times each experiment will be repeated
include_arpack = False # set this to True to include arpack solver (slower)
# 2- Generate random data
# -----------------------
n_features = 2
X, y = make_circles(n_samples=max_n_samples, factor=0.3, noise=0.05, random_state=0)
# 3- Benchmark
# ------------
# init
ref_time = np.empty((len(n_samples_range), n_iter)) * np.nan
a_time = np.empty((len(n_samples_range), n_iter)) * np.nan
r_time = np.empty((len(n_samples_range), n_iter)) * np.nan
# loop
for j, n_samples in enumerate(n_samples_range):
n_samples = int(n_samples)
print("Performing kPCA with n_samples = %i" % n_samples)
X_train = X[:n_samples, :]
X_test = X_train
# A- reference (dense)
print(" - dense")
for i in range(n_iter):
start_time = time.perf_counter()
ref_pred = (
KernelPCA(n_components, eigen_solver="dense").fit(X_train).transform(X_test)
)
ref_time[j, i] = time.perf_counter() - start_time
# B- arpack
if include_arpack:
print(" - arpack")
for i in range(n_iter):
start_time = time.perf_counter()
a_pred = (
KernelPCA(n_components, eigen_solver="arpack")
.fit(X_train)
.transform(X_test)
)
a_time[j, i] = time.perf_counter() - start_time
# check that the result is still correct despite the approx
assert_array_almost_equal(np.abs(a_pred), np.abs(ref_pred))
# C- randomized
print(" - randomized")
for i in range(n_iter):
start_time = time.perf_counter()
r_pred = (
KernelPCA(n_components, eigen_solver="randomized")
.fit(X_train)
.transform(X_test)
)
r_time[j, i] = time.perf_counter() - start_time
# check that the result is still correct despite the approximation
assert_array_almost_equal(np.abs(r_pred), np.abs(ref_pred))
# Compute statistics for the 3 methods
avg_ref_time = ref_time.mean(axis=1)
std_ref_time = ref_time.std(axis=1)
avg_a_time = a_time.mean(axis=1)
std_a_time = a_time.std(axis=1)
avg_r_time = r_time.mean(axis=1)
std_r_time = r_time.std(axis=1)
# 4- Plots
# --------
fig, ax = plt.subplots(figsize=(12, 8))
# Display 1 plot with error bars per method
ax.errorbar(
n_samples_range,
avg_ref_time,
yerr=std_ref_time,
marker="x",
linestyle="",
color="r",
label="full",
)
if include_arpack:
ax.errorbar(
n_samples_range,
avg_a_time,
yerr=std_a_time,
marker="x",
linestyle="",
color="g",
label="arpack",
)
ax.errorbar(
n_samples_range,
avg_r_time,
yerr=std_r_time,
marker="x",
linestyle="",
color="b",
label="randomized",
)
ax.legend(loc="upper left")
# customize axes
ax.set_xlim(min(n_samples_range) * 0.9, max(n_samples_range) * 1.1)
ax.set_ylabel("Execution time (s)")
ax.set_xlabel("n_samples")
ax.set_title(
"Execution time comparison of kPCA with %i components on samples "
"with %i features, according to the choice of `eigen_solver`"
"" % (n_components, n_features)
)
plt.show()
================================================
FILE: benchmarks/bench_lasso.py
================================================
"""
Benchmarks of Lasso vs LassoLars
First, we fix a training set and increase the number of
samples. Then we plot the computation time as function of
the number of samples.
In the second benchmark, we increase the number of dimensions of the
training set. Then we plot the computation time as function of
the number of dimensions.
In both cases, only 10% of the features are informative.
"""
import gc
from time import time
import numpy as np
from sklearn.datasets import make_regression
def compute_bench(alpha, n_samples, n_features, precompute):
lasso_results = []
lars_lasso_results = []
it = 0
for ns in n_samples:
for nf in n_features:
it += 1
print("==================")
print("Iteration %s of %s" % (it, max(len(n_samples), len(n_features))))
print("==================")
n_informative = nf // 10
X, Y, coef_ = make_regression(
n_samples=ns,
n_features=nf,
n_informative=n_informative,
noise=0.1,
coef=True,
)
X /= np.sqrt(np.sum(X ** 2, axis=0)) # Normalize data
gc.collect()
print("- benchmarking Lasso")
clf = Lasso(alpha=alpha, fit_intercept=False, precompute=precompute)
tstart = time()
clf.fit(X, Y)
lasso_results.append(time() - tstart)
gc.collect()
print("- benchmarking LassoLars")
clf = LassoLars(
alpha=alpha, fit_intercept=False, normalize=False, precompute=precompute
)
tstart = time()
clf.fit(X, Y)
lars_lasso_results.append(time() - tstart)
return lasso_results, lars_lasso_results
if __name__ == "__main__":
from sklearn.linear_model import Lasso, LassoLars
import matplotlib.pyplot as plt
alpha = 0.01 # regularization parameter
n_features = 10
list_n_samples = np.linspace(100, 1000000, 5).astype(int)
lasso_results, lars_lasso_results = compute_bench(
alpha, list_n_samples, [n_features], precompute=True
)
plt.figure("scikit-learn LASSO benchmark results")
plt.subplot(211)
plt.plot(list_n_samples, lasso_results, "b-", label="Lasso")
plt.plot(list_n_samples, lars_lasso_results, "r-", label="LassoLars")
plt.title("precomputed Gram matrix, %d features, alpha=%s" % (n_features, alpha))
plt.legend(loc="upper left")
plt.xlabel("number of samples")
plt.ylabel("Time (s)")
plt.axis("tight")
n_samples = 2000
list_n_features = np.linspace(500, 3000, 5).astype(int)
lasso_results, lars_lasso_results = compute_bench(
alpha, [n_samples], list_n_features, precompute=False
)
plt.subplot(212)
plt.plot(list_n_features, lasso_results, "b-", label="Lasso")
plt.plot(list_n_features, lars_lasso_results, "r-", label="LassoLars")
plt.title("%d samples, alpha=%s" % (n_samples, alpha))
plt.legend(loc="upper left")
plt.xlabel("number of features")
plt.ylabel("Time (s)")
plt.axis("tight")
plt.show()
================================================
FILE: benchmarks/bench_lof.py
================================================
"""
============================
LocalOutlierFactor benchmark
============================
A test of LocalOutlierFactor on classical anomaly detection datasets.
Note that LocalOutlierFactor is not meant to predict on a test set and its
performance is assessed in an outlier detection context:
1. The model is trained on the whole dataset which is assumed to contain
outliers.
2. The ROC curve is computed on the same dataset using the knowledge of the
labels.
In this context there is no need to shuffle the dataset because the model
is trained and tested on the whole dataset. The randomness of this benchmark
is only caused by the random selection of anomalies in the SA dataset.
"""
from time import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import roc_curve, auc
from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml
from sklearn.preprocessing import LabelBinarizer
print(__doc__)
random_state = 2 # to control the random selection of anomalies in SA
# datasets available: ['http', 'smtp', 'SA', 'SF', 'shuttle', 'forestcover']
datasets = ["http", "smtp", "SA", "SF", "shuttle", "forestcover"]
plt.figure()
for dataset_name in datasets:
# loading and vectorization
print("loading data")
if dataset_name in ["http", "smtp", "SA", "SF"]:
dataset = fetch_kddcup99(
subset=dataset_name, percent10=True, random_state=random_state
)
X = dataset.data
y = dataset.target
if dataset_name == "shuttle":
dataset = fetch_openml("shuttle")
X = dataset.data
y = dataset.target
# we remove data with label 4
# normal data are then those of class 1
s = y != 4
X = X[s, :]
y = y[s]
y = (y != 1).astype(int)
if dataset_name == "forestcover":
dataset = fetch_covtype()
X = dataset.data
y = dataset.target
# normal data are those with attribute 2
# abnormal those with attribute 4
s = (y == 2) + (y == 4)
X = X[s, :]
y = y[s]
y = (y != 2).astype(int)
print("vectorizing data")
if dataset_name == "SF":
lb = LabelBinarizer()
x1 = lb.fit_transform(X[:, 1].astype(str))
X = np.c_[X[:, :1], x1, X[:, 2:]]
y = (y != b"normal.").astype(int)
if dataset_name == "SA":
lb = LabelBinarizer()
x1 = lb.fit_transform(X[:, 1].astype(str))
x2 = lb.fit_transform(X[:, 2].astype(str))
x3 = lb.fit_transform(X[:, 3].astype(str))
X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]]
y = (y != b"normal.").astype(int)
if dataset_name == "http" or dataset_name == "smtp":
y = (y != b"normal.").astype(int)
X = X.astype(float)
print("LocalOutlierFactor processing...")
model = LocalOutlierFactor(n_neighbors=20)
tstart = time()
model.fit(X)
fit_time = time() - tstart
scoring = -model.negative_outlier_factor_ # the lower, the more normal
fpr, tpr, thresholds = roc_curve(y, scoring)
AUC = auc(fpr, tpr)
plt.plot(
fpr,
tpr,
lw=1,
label="ROC for %s (area = %0.3f, train-time: %0.2fs)"
% (dataset_name, AUC, fit_time),
)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver operating characteristic")
plt.legend(loc="lower right")
plt.show()
================================================
FILE: benchmarks/bench_mnist.py
================================================
"""
=======================
MNIST dataset benchmark
=======================
Benchmark on the MNIST dataset. The dataset comprises 70,000 samples
and 784 features. Here, we consider the task of predicting
10 classes - digits from 0 to 9 from their raw images. By contrast to the
covertype dataset, the feature space is homogeneous.
Example of output :
[..]
Classification performance:
===========================
Classifier train-time test-time error-rate
------------------------------------------------------------
MLP_adam 53.46s 0.11s 0.0224
Nystroem-SVM 112.97s 0.92s 0.0228
MultilayerPerceptron 24.33s 0.14s 0.0287
ExtraTrees 42.99s 0.57s 0.0294
RandomForest 42.70s 0.49s 0.0318
SampledRBF-SVM 135.81s 0.56s 0.0486
LinearRegression-SAG 16.67s 0.06s 0.0824
CART 20.69s 0.02s 0.1219
dummy 0.00s 0.01s 0.8973
"""
# Author: Issam H. Laradji
# Arnaud Joly <arnaud.v.joly@gmail.com>
# License: BSD 3 clause
import os
from time import time
import argparse
import numpy as np
from joblib import Memory
from sklearn.datasets import fetch_openml
from sklearn.datasets import get_data_home
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.kernel_approximation import Nystroem
from sklearn.kernel_approximation import RBFSampler
from sklearn.metrics import zero_one_loss
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import check_array
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
# Memoize the data extraction and memory map the resulting
# train / test splits in readonly mode
memory = Memory(os.path.join(get_data_home(), "mnist_benchmark_data"), mmap_mode="r")
@memory.cache
def load_data(dtype=np.float32, order="F"):
"""Load the data, then cache and memmap the train/test split"""
######################################################################
# Load dataset
print("Loading dataset...")
data = fetch_openml("mnist_784")
X = check_array(data["data"], dtype=dtype, order=order)
y = data["target"]
# Normalize features
X = X / 255
# Create train-test split (as [Joachims, 2006])
print("Creating train-test split...")
n_train = 60000
X_train = X[:n_train]
y_train = y[:n_train]
X_test = X[n_train:]
y_test = y[n_train:]
return X_train, X_test, y_train, y_test
ESTIMATORS = {
"dummy": DummyClassifier(),
"CART": DecisionTreeClassifier(),
"ExtraTrees": ExtraTreesClassifier(),
"RandomForest": RandomForestClassifier(),
"Nystroem-SVM": make_pipeline(
Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=100)
),
"SampledRBF-SVM": make_pipeline(
RBFSampler(gamma=0.015, n_components=1000), LinearSVC(C=100)
),
"LogisticRegression-SAG": LogisticRegression(solver="sag", tol=1e-1, C=1e4),
"LogisticRegression-SAGA": LogisticRegression(solver="saga", tol=1e-1, C=1e4),
"MultilayerPerceptron": MLPClassifier(
hidden_layer_sizes=(100, 100),
max_iter=400,
alpha=1e-4,
solver="sgd",
learning_rate_init=0.2,
momentum=0.9,
verbose=1,
tol=1e-4,
random_state=1,
),
"MLP-adam": MLPClassifier(
hidden_layer_sizes=(100, 100),
max_iter=400,
alpha=1e-4,
solver="adam",
learning_rate_init=0.001,
verbose=1,
tol=1e-4,
random_state=1,
),
}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--classifiers",
nargs="+",
choices=ESTIMATORS,
type=str,
default=["ExtraTrees", "Nystroem-SVM"],
help="list of classifiers to benchmark.",
)
parser.add_argument(
"--n-jobs",
nargs="?",
default=1,
type=int,
help=(
"Number of concurrently running workers for "
"models that support parallelism."
),
)
parser.add_argument(
"--order",
nargs="?",
default="C",
type=str,
choices=["F", "C"],
help="Allow to choose between fortran and C ordered data",
)
parser.add_argument(
"--random-seed",
nargs="?",
default=0,
type=int,
help="Common seed used by random number generator.",
)
args = vars(parser.parse_args())
print(__doc__)
X_train, X_test, y_train, y_test = load_data(order=args["order"])
print("")
print("Dataset statistics:")
print("===================")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print(
"%s %d (size=%dMB)"
% (
"number of train samples:".ljust(25),
X_train.shape[0],
int(X_train.nbytes / 1e6),
)
)
print(
"%s %d (size=%dMB)"
% (
"number of test samples:".ljust(25),
X_test.shape[0],
int(X_test.nbytes / 1e6),
)
)
print()
print("Training Classifiers")
print("====================")
error, train_time, test_time = {}, {}, {}
for name in sorted(args["classifiers"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(
**{
p: args["random_seed"]
for p in estimator_params
if p.endswith("rand
gitextract_8esimy8a/
├── .binder/
│ ├── postBuild
│ └── requirements.txt
├── .circleci/
│ ├── artifact_path
│ └── config.yml
├── .codecov.yml
├── .coveragerc
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── doc_improvement.yml
│ │ └── feature_request.yml
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── labeler-file-extensions.yml
│ ├── labeler-module.yml
│ ├── scripts/
│ │ └── label_title_regex.py
│ └── workflows/
│ ├── assign.yml
│ ├── check-changelog.yml
│ ├── check-manifest.yml
│ ├── labeler-module.yml
│ ├── labeler-title-regex.yml
│ ├── publish_pypi.yml
│ ├── twitter.yml
│ ├── unassign.yml
│ └── wheels.yml
├── .gitignore
├── .mailmap
├── .pre-commit-config.yaml
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── COPYING
├── MANIFEST.in
├── Makefile
├── README.rst
├── SECURITY.md
├── asv_benchmarks/
│ ├── .gitignore
│ ├── asv.conf.json
│ └── benchmarks/
│ ├── __init__.py
│ ├── cluster.py
│ ├── common.py
│ ├── config.json
│ ├── datasets.py
│ ├── decomposition.py
│ ├── ensemble.py
│ ├── linear_model.py
│ ├── manifold.py
│ ├── metrics.py
│ ├── model_selection.py
│ ├── neighbors.py
│ ├── svm.py
│ └── utils.py
├── azure-pipelines.yml
├── benchmarks/
│ ├── .gitignore
│ ├── bench_20newsgroups.py
│ ├── bench_covertype.py
│ ├── bench_feature_expansions.py
│ ├── bench_glm.py
│ ├── bench_glmnet.py
│ ├── bench_hist_gradient_boosting.py
│ ├── bench_hist_gradient_boosting_adult.py
│ ├── bench_hist_gradient_boosting_categorical_only.py
│ ├── bench_hist_gradient_boosting_higgsboson.py
│ ├── bench_hist_gradient_boosting_threading.py
│ ├── bench_isolation_forest.py
│ ├── bench_isotonic.py
│ ├── bench_kernel_pca_solvers_time_vs_n_components.py
│ ├── bench_kernel_pca_solvers_time_vs_n_samples.py
│ ├── bench_lasso.py
│ ├── bench_lof.py
│ ├── bench_mnist.py
│ ├── bench_multilabel_metrics.py
│ ├── bench_online_ocsvm.py
│ ├── bench_plot_fastkmeans.py
│ ├── bench_plot_hierarchical.py
│ ├── bench_plot_incremental_pca.py
│ ├── bench_plot_lasso_path.py
│ ├── bench_plot_neighbors.py
│ ├── bench_plot_nmf.py
│ ├── bench_plot_omp_lars.py
│ ├── bench_plot_parallel_pairwise.py
│ ├── bench_plot_polynomial_kernel_approximation.py
│ ├── bench_plot_randomized_svd.py
│ ├── bench_plot_svd.py
│ ├── bench_plot_ward.py
│ ├── bench_random_projections.py
│ ├── bench_rcv1_logreg_convergence.py
│ ├── bench_saga.py
│ ├── bench_sample_without_replacement.py
│ ├── bench_sgd_regression.py
│ ├── bench_sparsify.py
│ ├── bench_text_vectorizers.py
│ ├── bench_tree.py
│ ├── bench_tsne_mnist.py
│ └── plot_tsne_mnist.py
├── build_tools/
│ ├── Makefile
│ ├── azure/
│ │ ├── install.sh
│ │ ├── install_win.sh
│ │ ├── posix-docker.yml
│ │ ├── posix.yml
│ │ ├── test_docs.sh
│ │ ├── test_docstring.sh
│ │ ├── test_pytest_soft_dependency.sh
│ │ ├── test_script.sh
│ │ ├── upload_codecov.sh
│ │ └── windows.yml
│ ├── circle/
│ │ ├── build_doc.sh
│ │ ├── build_test_arm.sh
│ │ ├── build_test_pypy.sh
│ │ ├── checkout_merge_commit.sh
│ │ ├── linting.sh
│ │ ├── list_versions.py
│ │ └── push_doc.sh
│ ├── codespell_ignore_words.txt
│ ├── generate_authors_table.py
│ ├── github/
│ │ ├── Windows
│ │ ├── build_minimal_windows_image.sh
│ │ ├── build_source.sh
│ │ ├── build_wheels.sh
│ │ ├── check_build_trigger.sh
│ │ ├── check_wheels.py
│ │ ├── repair_windows_wheels.sh
│ │ ├── test_source.sh
│ │ ├── test_wheels.sh
│ │ ├── test_windows_wheels.sh
│ │ ├── upload_anaconda.sh
│ │ └── vendor.py
│ ├── shared.sh
│ └── travis/
│ ├── after_success.sh
│ ├── install.sh
│ ├── install_main.sh
│ ├── install_wheels.sh
│ ├── script.sh
│ ├── test_docs.sh
│ ├── test_script.sh
│ └── test_wheels.sh
├── conftest.py
├── doc/
│ ├── Makefile
│ ├── README.md
│ ├── about.rst
│ ├── authors.rst
│ ├── authors_emeritus.rst
│ ├── binder/
│ │ └── requirements.txt
│ ├── common_pitfalls.rst
│ ├── communication_team.rst
│ ├── computing/
│ │ ├── computational_performance.rst
│ │ ├── parallelism.rst
│ │ └── scaling_strategies.rst
│ ├── computing.rst
│ ├── conf.py
│ ├── conftest.py
│ ├── contents.rst
│ ├── data_transforms.rst
│ ├── datasets/
│ │ ├── loading_other_datasets.rst
│ │ ├── real_world.rst
│ │ ├── sample_generators.rst
│ │ └── toy_dataset.rst
│ ├── datasets.rst
│ ├── developers/
│ │ ├── advanced_installation.rst
│ │ ├── bug_triaging.rst
│ │ ├── contributing.rst
│ │ ├── develop.rst
│ │ ├── index.rst
│ │ ├── maintainer.rst
│ │ ├── performance.rst
│ │ ├── plotting.rst
│ │ ├── tips.rst
│ │ └── utilities.rst
│ ├── faq.rst
│ ├── getting_started.rst
│ ├── glossary.rst
│ ├── governance.rst
│ ├── includes/
│ │ ├── big_toc_css.rst
│ │ └── bigger_toc_css.rst
│ ├── inspection.rst
│ ├── install.rst
│ ├── make.bat
│ ├── model_selection.rst
│ ├── modules/
│ │ ├── biclustering.rst
│ │ ├── calibration.rst
│ │ ├── classes.rst
│ │ ├── clustering.rst
│ │ ├── compose.rst
│ │ ├── covariance.rst
│ │ ├── cross_decomposition.rst
│ │ ├── cross_validation.rst
│ │ ├── decomposition.rst
│ │ ├── density.rst
│ │ ├── ensemble.rst
│ │ ├── feature_extraction.rst
│ │ ├── feature_selection.rst
│ │ ├── gaussian_process.rst
│ │ ├── grid_search.rst
│ │ ├── impute.rst
│ │ ├── isotonic.rst
│ │ ├── kernel_approximation.rst
│ │ ├── kernel_ridge.rst
│ │ ├── lda_qda.rst
│ │ ├── learning_curve.rst
│ │ ├── linear_model.rst
│ │ ├── manifold.rst
│ │ ├── metrics.rst
│ │ ├── mixture.rst
│ │ ├── model_evaluation.rst
│ │ ├── model_persistence.rst
│ │ ├── multiclass.rst
│ │ ├── naive_bayes.rst
│ │ ├── neighbors.rst
│ │ ├── neural_networks_supervised.rst
│ │ ├── neural_networks_unsupervised.rst
│ │ ├── outlier_detection.rst
│ │ ├── partial_dependence.rst
│ │ ├── permutation_importance.rst
│ │ ├── pipeline.rst
│ │ ├── preprocessing.rst
│ │ ├── preprocessing_targets.rst
│ │ ├── random_projection.rst
│ │ ├── semi_supervised.rst
│ │ ├── sgd.rst
│ │ ├── svm.rst
│ │ ├── tree.rst
│ │ └── unsupervised_reduction.rst
│ ├── preface.rst
│ ├── presentations.rst
│ ├── related_projects.rst
│ ├── roadmap.rst
│ ├── sphinxext/
│ │ ├── MANIFEST.in
│ │ ├── add_toctree_functions.py
│ │ ├── custom_references_resolver.py
│ │ ├── doi_role.py
│ │ ├── github_link.py
│ │ └── sphinx_issues.py
│ ├── supervised_learning.rst
│ ├── support.rst
│ ├── templates/
│ │ ├── class.rst
│ │ ├── class_with_call.rst
│ │ ├── deprecated_class.rst
│ │ ├── deprecated_class_with_call.rst
│ │ ├── deprecated_class_without_init.rst
│ │ ├── deprecated_function.rst
│ │ ├── function.rst
│ │ ├── generate_deprecated.sh
│ │ ├── index.html
│ │ ├── numpydoc_docstring.rst
│ │ └── redirects.html
│ ├── testimonials/
│ │ ├── README.txt
│ │ ├── images/
│ │ │ └── Makefile
│ │ └── testimonials.rst
│ ├── themes/
│ │ └── scikit-learn-modern/
│ │ ├── javascript.html
│ │ ├── layout.html
│ │ ├── nav.html
│ │ ├── search.html
│ │ ├── static/
│ │ │ ├── css/
│ │ │ │ └── theme.css
│ │ │ └── js/
│ │ │ └── searchtools.js
│ │ └── theme.conf
│ ├── triage_team.rst
│ ├── tune_toc.rst
│ ├── tutorial/
│ │ ├── basic/
│ │ │ └── tutorial.rst
│ │ ├── common_includes/
│ │ │ └── info.txt
│ │ ├── index.rst
│ │ ├── machine_learning_map/
│ │ │ ├── ML_MAPS_README.txt
│ │ │ ├── index.rst
│ │ │ ├── parse_path.py
│ │ │ ├── pyparsing.py
│ │ │ └── svg2imagemap.py
│ │ ├── statistical_inference/
│ │ │ ├── index.rst
│ │ │ ├── model_selection.rst
│ │ │ ├── putting_together.rst
│ │ │ ├── settings.rst
│ │ │ ├── supervised_learning.rst
│ │ │ └── unsupervised_learning.rst
│ │ └── text_analytics/
│ │ ├── .gitignore
│ │ ├── data/
│ │ │ ├── languages/
│ │ │ │ └── fetch_data.py
│ │ │ ├── movie_reviews/
│ │ │ │ └── fetch_data.py
│ │ │ └── twenty_newsgroups/
│ │ │ └── fetch_data.py
│ │ ├── skeletons/
│ │ │ ├── exercise_01_language_train_model.py
│ │ │ └── exercise_02_sentiment.py
│ │ ├── solutions/
│ │ │ ├── exercise_01_language_train_model.py
│ │ │ ├── exercise_02_sentiment.py
│ │ │ └── generate_skeletons.py
│ │ └── working_with_text_data.rst
│ ├── unsupervised_learning.rst
│ ├── user_guide.rst
│ ├── visualizations.rst
│ ├── whats_new/
│ │ ├── _contributors.rst
│ │ ├── changelog_legend.inc
│ │ ├── older_versions.rst
│ │ ├── v0.13.rst
│ │ ├── v0.14.rst
│ │ ├── v0.15.rst
│ │ ├── v0.16.rst
│ │ ├── v0.17.rst
│ │ ├── v0.18.rst
│ │ ├── v0.19.rst
│ │ ├── v0.20.rst
│ │ ├── v0.21.rst
│ │ ├── v0.22.rst
│ │ ├── v0.23.rst
│ │ ├── v0.24.rst
│ │ ├── v1.0.rst
│ │ └── v1.1.rst
│ └── whats_new.rst
├── examples/
│ ├── README.txt
│ ├── applications/
│ │ ├── README.txt
│ │ ├── plot_cyclical_feature_engineering.py
│ │ ├── plot_digits_denoising.py
│ │ ├── plot_face_recognition.py
│ │ ├── plot_model_complexity_influence.py
│ │ ├── plot_out_of_core_classification.py
│ │ ├── plot_outlier_detection_wine.py
│ │ ├── plot_prediction_latency.py
│ │ ├── plot_species_distribution_modeling.py
│ │ ├── plot_stock_market.py
│ │ ├── plot_tomography_l1_reconstruction.py
│ │ ├── plot_topics_extraction_with_nmf_lda.py
│ │ ├── svm_gui.py
│ │ └── wikipedia_principal_eigenvector.py
│ ├── bicluster/
│ │ ├── README.txt
│ │ ├── plot_bicluster_newsgroups.py
│ │ ├── plot_spectral_biclustering.py
│ │ └── plot_spectral_coclustering.py
│ ├── calibration/
│ │ ├── README.txt
│ │ ├── plot_calibration.py
│ │ ├── plot_calibration_curve.py
│ │ ├── plot_calibration_multiclass.py
│ │ └── plot_compare_calibration.py
│ ├── classification/
│ │ ├── README.txt
│ │ ├── plot_classification_probability.py
│ │ ├── plot_classifier_comparison.py
│ │ ├── plot_digits_classification.py
│ │ ├── plot_lda.py
│ │ └── plot_lda_qda.py
│ ├── cluster/
│ │ ├── README.txt
│ │ ├── plot_adjusted_for_chance_measures.py
│ │ ├── plot_affinity_propagation.py
│ │ ├── plot_agglomerative_clustering.py
│ │ ├── plot_agglomerative_clustering_metrics.py
│ │ ├── plot_agglomerative_dendrogram.py
│ │ ├── plot_birch_vs_minibatchkmeans.py
│ │ ├── plot_cluster_comparison.py
│ │ ├── plot_cluster_iris.py
│ │ ├── plot_coin_segmentation.py
│ │ ├── plot_coin_ward_segmentation.py
│ │ ├── plot_color_quantization.py
│ │ ├── plot_dbscan.py
│ │ ├── plot_dict_face_patches.py
│ │ ├── plot_digits_agglomeration.py
│ │ ├── plot_digits_linkage.py
│ │ ├── plot_face_compress.py
│ │ ├── plot_feature_agglomeration_vs_univariate_selection.py
│ │ ├── plot_inductive_clustering.py
│ │ ├── plot_kmeans_assumptions.py
│ │ ├── plot_kmeans_digits.py
│ │ ├── plot_kmeans_plusplus.py
│ │ ├── plot_kmeans_silhouette_analysis.py
│ │ ├── plot_kmeans_stability_low_dim_dense.py
│ │ ├── plot_linkage_comparison.py
│ │ ├── plot_mean_shift.py
│ │ ├── plot_mini_batch_kmeans.py
│ │ ├── plot_optics.py
│ │ ├── plot_segmentation_toy.py
│ │ └── plot_ward_structured_vs_unstructured.py
│ ├── compose/
│ │ ├── README.txt
│ │ ├── plot_column_transformer.py
│ │ ├── plot_column_transformer_mixed_types.py
│ │ ├── plot_compare_reduction.py
│ │ ├── plot_digits_pipe.py
│ │ ├── plot_feature_union.py
│ │ └── plot_transformed_target.py
│ ├── covariance/
│ │ ├── README.txt
│ │ ├── plot_covariance_estimation.py
│ │ ├── plot_lw_vs_oas.py
│ │ ├── plot_mahalanobis_distances.py
│ │ ├── plot_robust_vs_empirical_covariance.py
│ │ └── plot_sparse_cov.py
│ ├── cross_decomposition/
│ │ ├── README.txt
│ │ ├── plot_compare_cross_decomposition.py
│ │ └── plot_pcr_vs_pls.py
│ ├── datasets/
│ │ ├── README.txt
│ │ ├── plot_digits_last_image.py
│ │ ├── plot_iris_dataset.py
│ │ ├── plot_random_dataset.py
│ │ └── plot_random_multilabel_dataset.py
│ ├── decomposition/
│ │ ├── README.txt
│ │ ├── plot_beta_divergence.py
│ │ ├── plot_faces_decomposition.py
│ │ ├── plot_ica_blind_source_separation.py
│ │ ├── plot_ica_vs_pca.py
│ │ ├── plot_image_denoising.py
│ │ ├── plot_incremental_pca.py
│ │ ├── plot_kernel_pca.py
│ │ ├── plot_pca_3d.py
│ │ ├── plot_pca_iris.py
│ │ ├── plot_pca_vs_fa_model_selection.py
│ │ ├── plot_pca_vs_lda.py
│ │ ├── plot_sparse_coding.py
│ │ └── plot_varimax_fa.py
│ ├── ensemble/
│ │ ├── README.txt
│ │ ├── plot_adaboost_hastie_10_2.py
│ │ ├── plot_adaboost_multiclass.py
│ │ ├── plot_adaboost_regression.py
│ │ ├── plot_adaboost_twoclass.py
│ │ ├── plot_bias_variance.py
│ │ ├── plot_ensemble_oob.py
│ │ ├── plot_feature_transformation.py
│ │ ├── plot_forest_importances.py
│ │ ├── plot_forest_importances_faces.py
│ │ ├── plot_forest_iris.py
│ │ ├── plot_gradient_boosting_categorical.py
│ │ ├── plot_gradient_boosting_early_stopping.py
│ │ ├── plot_gradient_boosting_oob.py
│ │ ├── plot_gradient_boosting_quantile.py
│ │ ├── plot_gradient_boosting_regression.py
│ │ ├── plot_gradient_boosting_regularization.py
│ │ ├── plot_isolation_forest.py
│ │ ├── plot_monotonic_constraints.py
│ │ ├── plot_random_forest_embedding.py
│ │ ├── plot_random_forest_regression_multioutput.py
│ │ ├── plot_stack_predictors.py
│ │ ├── plot_voting_decision_regions.py
│ │ ├── plot_voting_probas.py
│ │ └── plot_voting_regressor.py
│ ├── exercises/
│ │ ├── README.txt
│ │ ├── plot_cv_diabetes.py
│ │ ├── plot_cv_digits.py
│ │ ├── plot_digits_classification_exercise.py
│ │ └── plot_iris_exercise.py
│ ├── feature_selection/
│ │ ├── README.txt
│ │ ├── plot_f_test_vs_mi.py
│ │ ├── plot_feature_selection.py
│ │ ├── plot_feature_selection_pipeline.py
│ │ ├── plot_rfe_digits.py
│ │ ├── plot_rfe_with_cross_validation.py
│ │ └── plot_select_from_model_diabetes.py
│ ├── gaussian_process/
│ │ ├── README.txt
│ │ ├── plot_compare_gpr_krr.py
│ │ ├── plot_gpc.py
│ │ ├── plot_gpc_iris.py
│ │ ├── plot_gpc_isoprobability.py
│ │ ├── plot_gpc_xor.py
│ │ ├── plot_gpr_co2.py
│ │ ├── plot_gpr_noisy.py
│ │ ├── plot_gpr_noisy_targets.py
│ │ ├── plot_gpr_on_structured_data.py
│ │ └── plot_gpr_prior_posterior.py
│ ├── impute/
│ │ ├── README.txt
│ │ ├── plot_iterative_imputer_variants_comparison.py
│ │ └── plot_missing_values.py
│ ├── inspection/
│ │ ├── README.txt
│ │ ├── plot_linear_model_coefficient_interpretation.py
│ │ ├── plot_partial_dependence.py
│ │ ├── plot_permutation_importance.py
│ │ └── plot_permutation_importance_multicollinear.py
│ ├── kernel_approximation/
│ │ ├── README.txt
│ │ └── plot_scalable_poly_kernels.py
│ ├── linear_model/
│ │ ├── README.txt
│ │ ├── plot_ard.py
│ │ ├── plot_bayesian_ridge.py
│ │ ├── plot_bayesian_ridge_curvefit.py
│ │ ├── plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
│ │ ├── plot_huber_vs_ridge.py
│ │ ├── plot_iris_logistic.py
│ │ ├── plot_lasso_and_elasticnet.py
│ │ ├── plot_lasso_coordinate_descent_path.py
│ │ ├── plot_lasso_dense_vs_sparse_data.py
│ │ ├── plot_lasso_lars.py
│ │ ├── plot_lasso_model_selection.py
│ │ ├── plot_logistic.py
│ │ ├── plot_logistic_l1_l2_sparsity.py
│ │ ├── plot_logistic_multinomial.py
│ │ ├── plot_logistic_path.py
│ │ ├── plot_multi_task_lasso_support.py
│ │ ├── plot_nnls.py
│ │ ├── plot_ols.py
│ │ ├── plot_ols_3d.py
│ │ ├── plot_ols_ridge_variance.py
│ │ ├── plot_omp.py
│ │ ├── plot_poisson_regression_non_normal_loss.py
│ │ ├── plot_polynomial_interpolation.py
│ │ ├── plot_quantile_regression.py
│ │ ├── plot_ransac.py
│ │ ├── plot_ridge_coeffs.py
│ │ ├── plot_ridge_path.py
│ │ ├── plot_robust_fit.py
│ │ ├── plot_sgd_comparison.py
│ │ ├── plot_sgd_early_stopping.py
│ │ ├── plot_sgd_iris.py
│ │ ├── plot_sgd_loss_functions.py
│ │ ├── plot_sgd_penalties.py
│ │ ├── plot_sgd_separating_hyperplane.py
│ │ ├── plot_sgd_weighted_samples.py
│ │ ├── plot_sgdocsvm_vs_ocsvm.py
│ │ ├── plot_sparse_logistic_regression_20newsgroups.py
│ │ ├── plot_sparse_logistic_regression_mnist.py
│ │ ├── plot_theilsen.py
│ │ └── plot_tweedie_regression_insurance_claims.py
│ ├── manifold/
│ │ ├── README.txt
│ │ ├── plot_compare_methods.py
│ │ ├── plot_lle_digits.py
│ │ ├── plot_manifold_sphere.py
│ │ ├── plot_mds.py
│ │ ├── plot_swissroll.py
│ │ └── plot_t_sne_perplexity.py
│ ├── miscellaneous/
│ │ ├── README.txt
│ │ ├── plot_anomaly_comparison.py
│ │ ├── plot_changed_only_pprint_parameter.py
│ │ ├── plot_display_object_visualization.py
│ │ ├── plot_isotonic_regression.py
│ │ ├── plot_johnson_lindenstrauss_bound.py
│ │ ├── plot_kernel_approximation.py
│ │ ├── plot_kernel_ridge_regression.py
│ │ ├── plot_multilabel.py
│ │ ├── plot_multioutput_face_completion.py
│ │ ├── plot_partial_dependence_visualization_api.py
│ │ ├── plot_pipeline_display.py
│ │ └── plot_roc_curve_visualization_api.py
│ ├── mixture/
│ │ ├── README.txt
│ │ ├── plot_concentration_prior.py
│ │ ├── plot_gmm.py
│ │ ├── plot_gmm_covariances.py
│ │ ├── plot_gmm_pdf.py
│ │ ├── plot_gmm_selection.py
│ │ └── plot_gmm_sin.py
│ ├── model_selection/
│ │ ├── README.txt
│ │ ├── grid_search_text_feature_extraction.py
│ │ ├── plot_confusion_matrix.py
│ │ ├── plot_cv_indices.py
│ │ ├── plot_cv_predict.py
│ │ ├── plot_det.py
│ │ ├── plot_grid_search_digits.py
│ │ ├── plot_grid_search_refit_callable.py
│ │ ├── plot_grid_search_stats.py
│ │ ├── plot_learning_curve.py
│ │ ├── plot_multi_metric_evaluation.py
│ │ ├── plot_nested_cross_validation_iris.py
│ │ ├── plot_permutation_tests_for_classification.py
│ │ ├── plot_precision_recall.py
│ │ ├── plot_randomized_search.py
│ │ ├── plot_roc.py
│ │ ├── plot_roc_crossval.py
│ │ ├── plot_successive_halving_heatmap.py
│ │ ├── plot_successive_halving_iterations.py
│ │ ├── plot_train_error_vs_test_error.py
│ │ ├── plot_underfitting_overfitting.py
│ │ └── plot_validation_curve.py
│ ├── multioutput/
│ │ ├── README.txt
│ │ └── plot_classifier_chain_yeast.py
│ ├── neighbors/
│ │ ├── README.txt
│ │ ├── approximate_nearest_neighbors.py
│ │ ├── plot_caching_nearest_neighbors.py
│ │ ├── plot_classification.py
│ │ ├── plot_digits_kde_sampling.py
│ │ ├── plot_kde_1d.py
│ │ ├── plot_lof_novelty_detection.py
│ │ ├── plot_lof_outlier_detection.py
│ │ ├── plot_nca_classification.py
│ │ ├── plot_nca_dim_reduction.py
│ │ ├── plot_nca_illustration.py
│ │ ├── plot_nearest_centroid.py
│ │ ├── plot_regression.py
│ │ └── plot_species_kde.py
│ ├── neural_networks/
│ │ ├── README.txt
│ │ ├── plot_mlp_alpha.py
│ │ ├── plot_mlp_training_curves.py
│ │ ├── plot_mnist_filters.py
│ │ └── plot_rbm_logistic_classification.py
│ ├── preprocessing/
│ │ ├── README.txt
│ │ ├── plot_all_scaling.py
│ │ ├── plot_discretization.py
│ │ ├── plot_discretization_classification.py
│ │ ├── plot_discretization_strategies.py
│ │ ├── plot_map_data_to_normal.py
│ │ └── plot_scaling_importance.py
│ ├── release_highlights/
│ │ ├── README.txt
│ │ ├── plot_release_highlights_0_22_0.py
│ │ ├── plot_release_highlights_0_23_0.py
│ │ ├── plot_release_highlights_0_24_0.py
│ │ └── plot_release_highlights_1_0_0.py
│ ├── semi_supervised/
│ │ ├── README.txt
│ │ ├── plot_label_propagation_digits.py
│ │ ├── plot_label_propagation_digits_active_learning.py
│ │ ├── plot_label_propagation_structure.py
│ │ ├── plot_self_training_varying_threshold.py
│ │ ├── plot_semi_supervised_newsgroups.py
│ │ └── plot_semi_supervised_versus_svm_iris.py
│ ├── svm/
│ │ ├── README.txt
│ │ ├── plot_custom_kernel.py
│ │ ├── plot_iris_svc.py
│ │ ├── plot_linearsvc_support_vectors.py
│ │ ├── plot_oneclass.py
│ │ ├── plot_rbf_parameters.py
│ │ ├── plot_separating_hyperplane.py
│ │ ├── plot_separating_hyperplane_unbalanced.py
│ │ ├── plot_svm_anova.py
│ │ ├── plot_svm_kernels.py
│ │ ├── plot_svm_margin.py
│ │ ├── plot_svm_nonlinear.py
│ │ ├── plot_svm_regression.py
│ │ ├── plot_svm_scale_c.py
│ │ ├── plot_svm_tie_breaking.py
│ │ └── plot_weighted_samples.py
│ ├── text/
│ │ ├── README.txt
│ │ ├── plot_document_classification_20newsgroups.py
│ │ ├── plot_document_clustering.py
│ │ └── plot_hashing_vs_dict_vectorizer.py
│ └── tree/
│ ├── README.txt
│ ├── plot_cost_complexity_pruning.py
│ ├── plot_iris_dtc.py
│ ├── plot_tree_regression.py
│ ├── plot_tree_regression_multioutput.py
│ └── plot_unveil_tree_structure.py
├── lgtm.yml
├── maint_tools/
│ ├── check_pxd_in_installation.py
│ ├── create_issue_from_juint.py
│ ├── sort_whats_new.py
│ ├── test_docstrings.py
│ └── whats_missing.sh
├── pyproject.toml
├── setup.cfg
├── setup.py
└── sklearn/
├── __check_build/
│ ├── __init__.py
│ ├── _check_build.pyx
│ └── setup.py
├── __init__.py
├── _build_utils/
│ ├── __init__.py
│ ├── openmp_helpers.py
│ └── pre_build_helpers.py
├── _config.py
├── _distributor_init.py
├── _isotonic.pyx
├── _loss/
│ ├── __init__.py
│ ├── glm_distribution.py
│ └── tests/
│ ├── __init__.py
│ └── test_glm_distribution.py
├── _min_dependencies.py
├── base.py
├── calibration.py
├── cluster/
│ ├── __init__.py
│ ├── _affinity_propagation.py
│ ├── _agglomerative.py
│ ├── _bicluster.py
│ ├── _birch.py
│ ├── _dbscan.py
│ ├── _dbscan_inner.pyx
│ ├── _feature_agglomeration.py
│ ├── _hierarchical_fast.pyx
│ ├── _k_means_common.pxd
│ ├── _k_means_common.pyx
│ ├── _k_means_elkan.pyx
│ ├── _k_means_lloyd.pyx
│ ├── _k_means_minibatch.pyx
│ ├── _kmeans.py
│ ├── _mean_shift.py
│ ├── _optics.py
│ ├── _spectral.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── common.py
│ ├── test_affinity_propagation.py
│ ├── test_bicluster.py
│ ├── test_birch.py
│ ├── test_dbscan.py
│ ├── test_feature_agglomeration.py
│ ├── test_hierarchical.py
│ ├── test_k_means.py
│ ├── test_mean_shift.py
│ ├── test_optics.py
│ └── test_spectral.py
├── compose/
│ ├── __init__.py
│ ├── _column_transformer.py
│ ├── _target.py
│ └── tests/
│ ├── __init__.py
│ ├── test_column_transformer.py
│ └── test_target.py
├── conftest.py
├── covariance/
│ ├── __init__.py
│ ├── _elliptic_envelope.py
│ ├── _empirical_covariance.py
│ ├── _graph_lasso.py
│ ├── _robust_covariance.py
│ ├── _shrunk_covariance.py
│ └── tests/
│ ├── __init__.py
│ ├── test_covariance.py
│ ├── test_elliptic_envelope.py
│ ├── test_graphical_lasso.py
│ └── test_robust_covariance.py
├── cross_decomposition/
│ ├── __init__.py
│ ├── _pls.py
│ └── tests/
│ ├── __init__.py
│ └── test_pls.py
├── datasets/
│ ├── __init__.py
│ ├── _base.py
│ ├── _california_housing.py
│ ├── _covtype.py
│ ├── _kddcup99.py
│ ├── _lfw.py
│ ├── _olivetti_faces.py
│ ├── _openml.py
│ ├── _rcv1.py
│ ├── _samples_generator.py
│ ├── _species_distributions.py
│ ├── _svmlight_format_fast.pyx
│ ├── _svmlight_format_io.py
│ ├── _twenty_newsgroups.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── boston_house_prices.csv
│ │ ├── breast_cancer.csv
│ │ ├── iris.csv
│ │ ├── linnerud_exercise.csv
│ │ ├── linnerud_physiological.csv
│ │ └── wine_data.csv
│ ├── descr/
│ │ ├── __init__.py
│ │ ├── boston_house_prices.rst
│ │ ├── breast_cancer.rst
│ │ ├── california_housing.rst
│ │ ├── covtype.rst
│ │ ├── diabetes.rst
│ │ ├── digits.rst
│ │ ├── iris.rst
│ │ ├── kddcup99.rst
│ │ ├── lfw.rst
│ │ ├── linnerud.rst
│ │ ├── olivetti_faces.rst
│ │ ├── rcv1.rst
│ │ ├── twenty_newsgroups.rst
│ │ └── wine_data.rst
│ ├── images/
│ │ ├── README.txt
│ │ └── __init__.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── openml/
│ │ │ ├── __init__.py
│ │ │ ├── id_1/
│ │ │ │ └── __init__.py
│ │ │ ├── id_1119/
│ │ │ │ └── __init__.py
│ │ │ ├── id_2/
│ │ │ │ └── __init__.py
│ │ │ ├── id_292/
│ │ │ │ └── __init__.py
│ │ │ ├── id_3/
│ │ │ │ └── __init__.py
│ │ │ ├── id_40589/
│ │ │ │ └── __init__.py
│ │ │ ├── id_40675/
│ │ │ │ └── __init__.py
│ │ │ ├── id_40945/
│ │ │ │ └── __init__.py
│ │ │ ├── id_40966/
│ │ │ │ └── __init__.py
│ │ │ ├── id_42585/
│ │ │ │ └── __init__.py
│ │ │ ├── id_561/
│ │ │ │ └── __init__.py
│ │ │ ├── id_61/
│ │ │ │ └── __init__.py
│ │ │ └── id_62/
│ │ │ └── __init__.py
│ │ ├── svmlight_classification.txt
│ │ ├── svmlight_invalid.txt
│ │ ├── svmlight_invalid_order.txt
│ │ └── svmlight_multilabel.txt
│ ├── test_20news.py
│ ├── test_base.py
│ ├── test_california_housing.py
│ ├── test_common.py
│ ├── test_covtype.py
│ ├── test_kddcup99.py
│ ├── test_lfw.py
│ ├── test_olivetti_faces.py
│ ├── test_openml.py
│ ├── test_rcv1.py
│ ├── test_samples_generator.py
│ └── test_svmlight_format.py
├── decomposition/
│ ├── __init__.py
│ ├── _base.py
│ ├── _cdnmf_fast.pyx
│ ├── _dict_learning.py
│ ├── _factor_analysis.py
│ ├── _fastica.py
│ ├── _incremental_pca.py
│ ├── _kernel_pca.py
│ ├── _lda.py
│ ├── _nmf.py
│ ├── _online_lda_fast.pyx
│ ├── _pca.py
│ ├── _sparse_pca.py
│ ├── _truncated_svd.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_dict_learning.py
│ ├── test_factor_analysis.py
│ ├── test_fastica.py
│ ├── test_incremental_pca.py
│ ├── test_kernel_pca.py
│ ├── test_nmf.py
│ ├── test_online_lda.py
│ ├── test_pca.py
│ ├── test_sparse_pca.py
│ └── test_truncated_svd.py
├── discriminant_analysis.py
├── dummy.py
├── ensemble/
│ ├── __init__.py
│ ├── _bagging.py
│ ├── _base.py
│ ├── _forest.py
│ ├── _gb.py
│ ├── _gb_losses.py
│ ├── _gradient_boosting.pyx
│ ├── _hist_gradient_boosting/
│ │ ├── __init__.py
│ │ ├── _binning.pyx
│ │ ├── _bitset.pxd
│ │ ├── _bitset.pyx
│ │ ├── _gradient_boosting.pyx
│ │ ├── _loss.pyx
│ │ ├── _predictor.pyx
│ │ ├── binning.py
│ │ ├── common.pxd
│ │ ├── common.pyx
│ │ ├── gradient_boosting.py
│ │ ├── grower.py
│ │ ├── histogram.pyx
│ │ ├── loss.py
│ │ ├── predictor.py
│ │ ├── splitting.pyx
│ │ ├── tests/
│ │ │ ├── __init__.py
│ │ │ ├── test_binning.py
│ │ │ ├── test_bitset.py
│ │ │ ├── test_compare_lightgbm.py
│ │ │ ├── test_gradient_boosting.py
│ │ │ ├── test_grower.py
│ │ │ ├── test_histogram.py
│ │ │ ├── test_loss.py
│ │ │ ├── test_monotonic_contraints.py
│ │ │ ├── test_predictor.py
│ │ │ ├── test_splitting.py
│ │ │ └── test_warm_start.py
│ │ └── utils.pyx
│ ├── _iforest.py
│ ├── _stacking.py
│ ├── _voting.py
│ ├── _weight_boosting.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_bagging.py
│ ├── test_base.py
│ ├── test_common.py
│ ├── test_forest.py
│ ├── test_gradient_boosting.py
│ ├── test_gradient_boosting_loss_functions.py
│ ├── test_iforest.py
│ ├── test_stacking.py
│ ├── test_voting.py
│ └── test_weight_boosting.py
├── exceptions.py
├── experimental/
│ ├── __init__.py
│ ├── enable_halving_search_cv.py
│ ├── enable_hist_gradient_boosting.py
│ ├── enable_iterative_imputer.py
│ └── tests/
│ ├── __init__.py
│ ├── test_enable_hist_gradient_boosting.py
│ ├── test_enable_iterative_imputer.py
│ └── test_enable_successive_halving.py
├── externals/
│ ├── README
│ ├── __init__.py
│ ├── _arff.py
│ ├── _lobpcg.py
│ ├── _packaging/
│ │ ├── __init__.py
│ │ ├── _structures.py
│ │ └── version.py
│ ├── _pilutil.py
│ └── conftest.py
├── feature_extraction/
│ ├── __init__.py
│ ├── _dict_vectorizer.py
│ ├── _hash.py
│ ├── _hashing_fast.pyx
│ ├── _stop_words.py
│ ├── image.py
│ ├── setup.py
│ ├── tests/
│ │ ├── __init__.py
│ │ ├── test_dict_vectorizer.py
│ │ ├── test_feature_hasher.py
│ │ ├── test_image.py
│ │ └── test_text.py
│ └── text.py
├── feature_selection/
│ ├── __init__.py
│ ├── _base.py
│ ├── _from_model.py
│ ├── _mutual_info.py
│ ├── _rfe.py
│ ├── _sequential.py
│ ├── _univariate_selection.py
│ ├── _variance_threshold.py
│ └── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_chi2.py
│ ├── test_feature_select.py
│ ├── test_from_model.py
│ ├── test_mutual_info.py
│ ├── test_rfe.py
│ ├── test_sequential.py
│ └── test_variance_threshold.py
├── gaussian_process/
│ ├── __init__.py
│ ├── _gpc.py
│ ├── _gpr.py
│ ├── kernels.py
│ └── tests/
│ ├── __init__.py
│ ├── _mini_sequence_kernel.py
│ ├── test_gpc.py
│ ├── test_gpr.py
│ └── test_kernels.py
├── impute/
│ ├── __init__.py
│ ├── _base.py
│ ├── _iterative.py
│ ├── _knn.py
│ └── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_common.py
│ ├── test_impute.py
│ └── test_knn.py
├── inspection/
│ ├── __init__.py
│ ├── _partial_dependence.py
│ ├── _permutation_importance.py
│ ├── _plot/
│ │ ├── __init__.py
│ │ ├── partial_dependence.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ └── test_plot_partial_dependence.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_partial_dependence.py
│ └── test_permutation_importance.py
├── isotonic.py
├── kernel_approximation.py
├── kernel_ridge.py
├── linear_model/
│ ├── __init__.py
│ ├── _base.py
│ ├── _bayes.py
│ ├── _cd_fast.pyx
│ ├── _coordinate_descent.py
│ ├── _glm/
│ │ ├── __init__.py
│ │ ├── glm.py
│ │ ├── link.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_glm.py
│ │ └── test_link.py
│ ├── _huber.py
│ ├── _least_angle.py
│ ├── _logistic.py
│ ├── _omp.py
│ ├── _passive_aggressive.py
│ ├── _perceptron.py
│ ├── _quantile.py
│ ├── _ransac.py
│ ├── _ridge.py
│ ├── _sag.py
│ ├── _sag_fast.pyx.tp
│ ├── _sgd_fast.pxd
│ ├── _sgd_fast.pyx
│ ├── _sgd_fast_helpers.h
│ ├── _stochastic_gradient.py
│ ├── _theil_sen.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_bayes.py
│ ├── test_common.py
│ ├── test_coordinate_descent.py
│ ├── test_huber.py
│ ├── test_least_angle.py
│ ├── test_logistic.py
│ ├── test_omp.py
│ ├── test_passive_aggressive.py
│ ├── test_perceptron.py
│ ├── test_quantile.py
│ ├── test_ransac.py
│ ├── test_ridge.py
│ ├── test_sag.py
│ ├── test_sgd.py
│ ├── test_sparse_coordinate_descent.py
│ └── test_theil_sen.py
├── manifold/
│ ├── __init__.py
│ ├── _barnes_hut_tsne.pyx
│ ├── _isomap.py
│ ├── _locally_linear.py
│ ├── _mds.py
│ ├── _spectral_embedding.py
│ ├── _t_sne.py
│ ├── _utils.pyx
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_isomap.py
│ ├── test_locally_linear.py
│ ├── test_mds.py
│ ├── test_spectral_embedding.py
│ └── test_t_sne.py
├── metrics/
│ ├── __init__.py
│ ├── _base.py
│ ├── _classification.py
│ ├── _dist_metrics.pxd
│ ├── _dist_metrics.pyx
│ ├── _pairwise_fast.pyx
│ ├── _plot/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── confusion_matrix.py
│ │ ├── det_curve.py
│ │ ├── precision_recall_curve.py
│ │ ├── roc_curve.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_base.py
│ │ ├── test_common_curve_display.py
│ │ ├── test_confusion_matrix_display.py
│ │ ├── test_det_curve_display.py
│ │ ├── test_plot_confusion_matrix.py
│ │ ├── test_plot_curve_common.py
│ │ ├── test_plot_det_curve.py
│ │ ├── test_plot_precision_recall.py
│ │ ├── test_plot_roc_curve.py
│ │ ├── test_precision_recall_display.py
│ │ └── test_roc_curve_display.py
│ ├── _ranking.py
│ ├── _regression.py
│ ├── _scorer.py
│ ├── cluster/
│ │ ├── __init__.py
│ │ ├── _bicluster.py
│ │ ├── _expected_mutual_info_fast.pyx
│ │ ├── _supervised.py
│ │ ├── _unsupervised.py
│ │ ├── setup.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_bicluster.py
│ │ ├── test_common.py
│ │ ├── test_supervised.py
│ │ └── test_unsupervised.py
│ ├── pairwise.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_classification.py
│ ├── test_common.py
│ ├── test_dist_metrics.py
│ ├── test_pairwise.py
│ ├── test_ranking.py
│ ├── test_regression.py
│ └── test_score_objects.py
├── mixture/
│ ├── __init__.py
│ ├── _base.py
│ ├── _bayesian_mixture.py
│ ├── _gaussian_mixture.py
│ └── tests/
│ ├── __init__.py
│ ├── test_bayesian_mixture.py
│ ├── test_gaussian_mixture.py
│ └── test_mixture.py
├── model_selection/
│ ├── __init__.py
│ ├── _search.py
│ ├── _search_successive_halving.py
│ ├── _split.py
│ ├── _validation.py
│ └── tests/
│ ├── __init__.py
│ ├── common.py
│ ├── test_search.py
│ ├── test_split.py
│ ├── test_successive_halving.py
│ └── test_validation.py
├── multiclass.py
├── multioutput.py
├── naive_bayes.py
├── neighbors/
│ ├── __init__.py
│ ├── _ball_tree.pyx
│ ├── _base.py
│ ├── _binary_tree.pxi
│ ├── _classification.py
│ ├── _distance_metric.py
│ ├── _graph.py
│ ├── _kd_tree.pyx
│ ├── _kde.py
│ ├── _lof.py
│ ├── _nca.py
│ ├── _nearest_centroid.py
│ ├── _partition_nodes.pxd
│ ├── _partition_nodes.pyx
│ ├── _quad_tree.pxd
│ ├── _quad_tree.pyx
│ ├── _regression.py
│ ├── _unsupervised.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_ball_tree.py
│ ├── test_graph.py
│ ├── test_kd_tree.py
│ ├── test_kde.py
│ ├── test_lof.py
│ ├── test_nca.py
│ ├── test_nearest_centroid.py
│ ├── test_neighbors.py
│ ├── test_neighbors_pipeline.py
│ ├── test_neighbors_tree.py
│ └── test_quad_tree.py
├── neural_network/
│ ├── __init__.py
│ ├── _base.py
│ ├── _multilayer_perceptron.py
│ ├── _rbm.py
│ ├── _stochastic_optimizers.py
│ └── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_mlp.py
│ ├── test_rbm.py
│ └── test_stochastic_optimizers.py
├── pipeline.py
├── preprocessing/
│ ├── __init__.py
│ ├── _csr_polynomial_expansion.pyx
│ ├── _data.py
│ ├── _discretization.py
│ ├── _encoders.py
│ ├── _function_transformer.py
│ ├── _label.py
│ ├── _polynomial.py
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_common.py
│ ├── test_data.py
│ ├── test_discretization.py
│ ├── test_encoders.py
│ ├── test_function_transformer.py
│ ├── test_label.py
│ └── test_polynomial.py
├── random_projection.py
├── semi_supervised/
│ ├── __init__.py
│ ├── _label_propagation.py
│ ├── _self_training.py
│ └── tests/
│ ├── __init__.py
│ ├── test_label_propagation.py
│ └── test_self_training.py
├── setup.py
├── svm/
│ ├── __init__.py
│ ├── _base.py
│ ├── _bounds.py
│ ├── _classes.py
│ ├── _liblinear.pxi
│ ├── _liblinear.pyx
│ ├── _libsvm.pxi
│ ├── _libsvm.pyx
│ ├── _libsvm_sparse.pyx
│ ├── _newrand.pyx
│ ├── setup.py
│ ├── src/
│ │ ├── liblinear/
│ │ │ ├── COPYRIGHT
│ │ │ ├── _cython_blas_helpers.h
│ │ │ ├── liblinear_helper.c
│ │ │ ├── linear.cpp
│ │ │ ├── linear.h
│ │ │ ├── tron.cpp
│ │ │ └── tron.h
│ │ ├── libsvm/
│ │ │ ├── LIBSVM_CHANGES
│ │ │ ├── _svm_cython_blas_helpers.h
│ │ │ ├── libsvm_helper.c
│ │ │ ├── libsvm_sparse_helper.c
│ │ │ ├── libsvm_template.cpp
│ │ │ ├── svm.cpp
│ │ │ └── svm.h
│ │ └── newrand/
│ │ └── newrand.h
│ └── tests/
│ ├── __init__.py
│ ├── test_bounds.py
│ ├── test_sparse.py
│ └── test_svm.py
├── tests/
│ ├── __init__.py
│ ├── test_base.py
│ ├── test_build.py
│ ├── test_calibration.py
│ ├── test_check_build.py
│ ├── test_common.py
│ ├── test_config.py
│ ├── test_discriminant_analysis.py
│ ├── test_docstring_parameters.py
│ ├── test_dummy.py
│ ├── test_init.py
│ ├── test_isotonic.py
│ ├── test_kernel_approximation.py
│ ├── test_kernel_ridge.py
│ ├── test_metaestimators.py
│ ├── test_min_dependencies_readme.py
│ ├── test_multiclass.py
│ ├── test_multioutput.py
│ ├── test_naive_bayes.py
│ ├── test_pipeline.py
│ └── test_random_projection.py
├── tree/
│ ├── __init__.py
│ ├── _classes.py
│ ├── _criterion.pxd
│ ├── _criterion.pyx
│ ├── _export.py
│ ├── _reingold_tilford.py
│ ├── _splitter.pxd
│ ├── _splitter.pyx
│ ├── _tree.pxd
│ ├── _tree.pyx
│ ├── _utils.pxd
│ ├── _utils.pyx
│ ├── setup.py
│ └── tests/
│ ├── __init__.py
│ ├── test_export.py
│ ├── test_reingold_tilford.py
│ └── test_tree.py
└── utils/
├── __init__.py
├── _arpack.py
├── _cython_blas.pxd
├── _cython_blas.pyx
├── _encode.py
├── _estimator_html_repr.py
├── _fast_dict.pxd
├── _fast_dict.pyx
├── _joblib.py
├── _logistic_sigmoid.pyx
├── _mask.py
├── _mocking.py
├── _openmp_helpers.pyx
├── _pprint.py
├── _random.pxd
├── _random.pyx
├── _readonly_array_wrapper.pyx
├── _seq_dataset.pxd.tp
├── _seq_dataset.pyx.tp
├── _show_versions.py
├── _tags.py
├── _testing.py
├── _typedefs.pxd
├── _typedefs.pyx
├── _weight_vector.pxd.tp
├── _weight_vector.pyx.tp
├── arrayfuncs.pyx
├── class_weight.py
├── deprecation.py
├── estimator_checks.py
├── extmath.py
├── fixes.py
├── graph.py
├── metaestimators.py
├── multiclass.py
├── murmurhash.pxd
├── murmurhash.pyx
├── optimize.py
├── random.py
├── setup.py
├── sparsefuncs.py
├── sparsefuncs_fast.pyx
├── src/
│ ├── MurmurHash3.cpp
│ └── MurmurHash3.h
├── stats.py
├── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_arpack.py
│ ├── test_arrayfuncs.py
│ ├── test_class_weight.py
│ ├── test_cython_blas.py
│ ├── test_cython_templating.py
│ ├── test_deprecation.py
│ ├── test_encode.py
│ ├── test_estimator_checks.py
│ ├── test_estimator_html_repr.py
│ ├── test_extmath.py
│ ├── test_fast_dict.py
│ ├── test_fixes.py
│ ├── test_graph.py
│ ├── test_metaestimators.py
│ ├── test_mocking.py
│ ├── test_multiclass.py
│ ├── test_murmurhash.py
│ ├── test_optimize.py
│ ├── test_parallel.py
│ ├── test_pprint.py
│ ├── test_random.py
│ ├── test_readonly_wrapper.py
│ ├── test_seq_dataset.py
│ ├── test_shortest_path.py
│ ├── test_show_versions.py
│ ├── test_sparsefuncs.py
│ ├── test_stats.py
│ ├── test_tags.py
│ ├── test_testing.py
│ ├── test_utils.py
│ ├── test_validation.py
│ └── test_weight_vector.py
└── validation.py
Showing preview only (839K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (9625 symbols across 598 files)
FILE: asv_benchmarks/benchmarks/cluster.py
class KMeansBenchmark (line 8) | class KMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
method setup_cache (line 16) | def setup_cache(self):
method make_data (line 19) | def make_data(self, params):
method make_estimator (line 29) | def make_estimator(self, params):
method make_scorers (line 46) | def make_scorers(self):
class MiniBatchKMeansBenchmark (line 57) | class MiniBatchKMeansBenchmark(Predictor, Transformer, Estimator, Benchm...
method setup_cache (line 65) | def setup_cache(self):
method make_data (line 68) | def make_data(self, params):
method make_estimator (line 78) | def make_estimator(self, params):
method make_scorers (line 96) | def make_scorers(self):
FILE: asv_benchmarks/benchmarks/common.py
function get_from_config (line 13) | def get_from_config():
function get_estimator_path (line 59) | def get_estimator_path(benchmark, directory, params, save=False):
function clear_tmp (line 74) | def clear_tmp():
class Benchmark (line 81) | class Benchmark(ABC):
method params (line 116) | def params(self):
class Estimator (line 120) | class Estimator(ABC):
method make_data (line 124) | def make_data(self, params):
method make_estimator (line 131) | def make_estimator(self, params):
method skip (line 135) | def skip(self, params):
method setup_cache (line 139) | def setup_cache(self):
method setup (line 162) | def setup(self, *params):
method time_fit (line 180) | def time_fit(self, *args):
method peakmem_fit (line 183) | def peakmem_fit(self, *args):
method track_train_score (line 186) | def track_train_score(self, *args):
method track_test_score (line 193) | def track_test_score(self, *args):
class Predictor (line 201) | class Predictor(ABC):
method time_predict (line 206) | def time_predict(self, *args):
method peakmem_predict (line 209) | def peakmem_predict(self, *args):
method track_same_prediction (line 214) | def track_same_prediction(self, *args):
method params (line 226) | def params(self):
class Transformer (line 230) | class Transformer(ABC):
method time_transform (line 235) | def time_transform(self, *args):
method peakmem_transform (line 238) | def peakmem_transform(self, *args):
method track_same_transform (line 243) | def track_same_transform(self, *args):
method params (line 255) | def params(self):
FILE: asv_benchmarks/benchmarks/datasets.py
function _blobs_dataset (line 25) | def _blobs_dataset(n_samples=500000, n_features=3, n_clusters=100, dtype...
function _20newsgroups_highdim_dataset (line 36) | def _20newsgroups_highdim_dataset(n_samples=None, ngrams=(1, 1), dtype=n...
function _20newsgroups_lowdim_dataset (line 47) | def _20newsgroups_lowdim_dataset(n_components=100, ngrams=(1, 1), dtype=...
function _mnist_dataset (line 61) | def _mnist_dataset(dtype=np.float32):
function _digits_dataset (line 71) | def _digits_dataset(n_samples=None, dtype=np.float32):
function _synth_regression_dataset (line 83) | def _synth_regression_dataset(n_samples=100000, n_features=100, dtype=np...
function _synth_regression_sparse_dataset (line 99) | def _synth_regression_sparse_dataset(
function _synth_classification_dataset (line 117) | def _synth_classification_dataset(
function _olivetti_faces_dataset (line 136) | def _olivetti_faces_dataset():
function _random_dataset (line 150) | def _random_dataset(
FILE: asv_benchmarks/benchmarks/decomposition.py
class PCABenchmark (line 8) | class PCABenchmark(Transformer, Estimator, Benchmark):
method setup_cache (line 16) | def setup_cache(self):
method make_data (line 19) | def make_data(self, params):
method make_estimator (line 22) | def make_estimator(self, params):
method make_scorers (line 29) | def make_scorers(self):
class DictionaryLearningBenchmark (line 33) | class DictionaryLearningBenchmark(Transformer, Estimator, Benchmark):
method setup_cache (line 41) | def setup_cache(self):
method make_data (line 44) | def make_data(self, params):
method make_estimator (line 47) | def make_estimator(self, params):
method make_scorers (line 62) | def make_scorers(self):
class MiniBatchDictionaryLearningBenchmark (line 66) | class MiniBatchDictionaryLearningBenchmark(Transformer, Estimator, Bench...
method setup_cache (line 74) | def setup_cache(self):
method make_data (line 77) | def make_data(self, params):
method make_estimator (line 80) | def make_estimator(self, params):
method make_scorers (line 94) | def make_scorers(self):
FILE: asv_benchmarks/benchmarks/ensemble.py
class RandomForestClassifierBenchmark (line 16) | class RandomForestClassifierBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 24) | def setup_cache(self):
method make_data (line 27) | def make_data(self, params):
method make_estimator (line 37) | def make_estimator(self, params):
method make_scorers (line 52) | def make_scorers(self):
class GradientBoostingClassifierBenchmark (line 56) | class GradientBoostingClassifierBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 64) | def setup_cache(self):
method make_data (line 67) | def make_data(self, params):
method make_estimator (line 77) | def make_estimator(self, params):
method make_scorers (line 91) | def make_scorers(self):
class HistGradientBoostingClassifierBenchmark (line 95) | class HistGradientBoostingClassifierBenchmark(Predictor, Estimator, Benc...
method setup_cache (line 103) | def setup_cache(self):
method make_data (line 106) | def make_data(self, params):
method make_estimator (line 113) | def make_estimator(self, params):
method make_scorers (line 120) | def make_scorers(self):
FILE: asv_benchmarks/benchmarks/linear_model.py
class LogisticRegressionBenchmark (line 20) | class LogisticRegressionBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 28) | def setup_cache(self):
method make_data (line 31) | def make_data(self, params):
method make_estimator (line 47) | def make_estimator(self, params):
method make_scorers (line 63) | def make_scorers(self):
class RidgeBenchmark (line 67) | class RidgeBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 78) | def setup_cache(self):
method make_data (line 81) | def make_data(self, params):
method make_estimator (line 93) | def make_estimator(self, params):
method make_scorers (line 100) | def make_scorers(self):
method skip (line 103) | def skip(self, params):
class LinearRegressionBenchmark (line 111) | class LinearRegressionBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 119) | def setup_cache(self):
method make_data (line 122) | def make_data(self, params):
method make_estimator (line 134) | def make_estimator(self, params):
method make_scorers (line 139) | def make_scorers(self):
class SGDRegressorBenchmark (line 143) | class SGDRegressorBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 151) | def setup_cache(self):
method make_data (line 154) | def make_data(self, params):
method make_estimator (line 166) | def make_estimator(self, params):
method make_scorers (line 171) | def make_scorers(self):
class ElasticNetBenchmark (line 175) | class ElasticNetBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 183) | def setup_cache(self):
method make_data (line 186) | def make_data(self, params):
method make_estimator (line 198) | def make_estimator(self, params):
method make_scorers (line 205) | def make_scorers(self):
method skip (line 208) | def skip(self, params):
class LassoBenchmark (line 216) | class LassoBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 224) | def setup_cache(self):
method make_data (line 227) | def make_data(self, params):
method make_estimator (line 239) | def make_estimator(self, params):
method make_scorers (line 246) | def make_scorers(self):
method skip (line 249) | def skip(self, params):
FILE: asv_benchmarks/benchmarks/manifold.py
class TSNEBenchmark (line 7) | class TSNEBenchmark(Estimator, Benchmark):
method setup_cache (line 15) | def setup_cache(self):
method make_data (line 18) | def make_data(self, params):
method make_estimator (line 25) | def make_estimator(self, params):
method make_scorers (line 32) | def make_scorers(self):
FILE: asv_benchmarks/benchmarks/metrics.py
class PairwiseDistancesBenchmark (line 7) | class PairwiseDistancesBenchmark(Benchmark):
method setup (line 19) | def setup(self, *params):
method time_pairwise_distances (line 41) | def time_pairwise_distances(self, *args):
method peakmem_pairwise_distances (line 44) | def peakmem_pairwise_distances(self, *args):
FILE: asv_benchmarks/benchmarks/model_selection.py
class CrossValidationBenchmark (line 9) | class CrossValidationBenchmark(Benchmark):
method setup (line 19) | def setup(self, *params):
method time_crossval (line 31) | def time_crossval(self, *args):
method peakmem_crossval (line 34) | def peakmem_crossval(self, *args):
method track_crossval (line 37) | def track_crossval(self, *args):
class GridSearchBenchmark (line 41) | class GridSearchBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 51) | def setup_cache(self):
method make_data (line 54) | def make_data(self, params):
method make_estimator (line 59) | def make_estimator(self, params):
method make_scorers (line 83) | def make_scorers(self):
FILE: asv_benchmarks/benchmarks/neighbors.py
class KNeighborsClassifierBenchmark (line 8) | class KNeighborsClassifierBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 16) | def setup_cache(self):
method make_data (line 19) | def make_data(self, params):
method make_estimator (line 31) | def make_estimator(self, params):
method make_scorers (line 38) | def make_scorers(self):
FILE: asv_benchmarks/benchmarks/svm.py
class SVCBenchmark (line 8) | class SVCBenchmark(Predictor, Estimator, Benchmark):
method setup_cache (line 14) | def setup_cache(self):
method make_data (line 17) | def make_data(self, params):
method make_estimator (line 20) | def make_estimator(self, params):
method make_scorers (line 29) | def make_scorers(self):
FILE: asv_benchmarks/benchmarks/utils.py
function neg_mean_inertia (line 6) | def neg_mean_inertia(X, labels, centers):
function make_gen_classif_scorers (line 10) | def make_gen_classif_scorers(caller):
function make_gen_reg_scorers (line 15) | def make_gen_reg_scorers(caller):
function neg_mean_data_error (line 20) | def neg_mean_data_error(X, U, V):
function make_dict_learning_scorers (line 24) | def make_dict_learning_scorers(caller):
function explained_variance_ratio (line 39) | def explained_variance_ratio(Xt, X):
function make_pca_scorers (line 43) | def make_pca_scorers(caller):
FILE: benchmarks/bench_covertype.py
function load_data (line 72) | def load_data(dtype=np.float32, order="C", random_state=13):
FILE: benchmarks/bench_glmnet.py
function rmse (line 28) | def rmse(a, b):
function bench (line 32) | def bench(factory, X, Y, X_test, Y_test, ref_coef):
FILE: benchmarks/bench_hist_gradient_boosting.py
function get_estimator_and_data (line 53) | def get_estimator_and_data():
function one_run (line 92) | def one_run(n_samples):
FILE: benchmarks/bench_hist_gradient_boosting_adult.py
function fit (line 28) | def fit(est, data_train, target_train, libname, **fit_params):
function predict (line 36) | def predict(est, data_test, target_test):
FILE: benchmarks/bench_hist_gradient_boosting_categorical_only.py
function fit (line 33) | def fit(est, data_train, target_train, libname, **fit_params):
function predict (line 41) | def predict(est, data_test):
FILE: benchmarks/bench_hist_gradient_boosting_higgsboson.py
function load_data (line 41) | def load_data():
function fit (line 57) | def fit(est, data_train, target_train, libname):
function predict (line 65) | def predict(est, data_test, target_test):
FILE: benchmarks/bench_hist_gradient_boosting_threading.py
function get_estimator_and_data (line 69) | def get_estimator_and_data():
function one_run (line 144) | def one_run(n_threads, n_samples):
FILE: benchmarks/bench_isolation_forest.py
function print_outlier_ratio (line 32) | def print_outlier_ratio(y):
FILE: benchmarks/bench_isotonic.py
function generate_perturbed_logarithm_dataset (line 22) | def generate_perturbed_logarithm_dataset(size):
function generate_logistic_dataset (line 26) | def generate_logistic_dataset(size):
function generate_pathological_dataset (line 31) | def generate_pathological_dataset(size):
function bench_isotonic_regression (line 45) | def bench_isotonic_regression(Y):
FILE: benchmarks/bench_lasso.py
function compute_bench (line 21) | def compute_bench(alpha, n_samples, n_features, precompute):
FILE: benchmarks/bench_mnist.py
function load_data (line 60) | def load_data(dtype=np.float32, order="F"):
FILE: benchmarks/bench_multilabel_metrics.py
function benchmark (line 43) | def benchmark(
function _tabulate (line 106) | def _tabulate(results, metrics, formats):
function _plot (line 120) | def _plot(
FILE: benchmarks/bench_online_ocsvm.py
function print_outlier_ratio (line 41) | def print_outlier_ratio(y):
function autolabel_auc (line 235) | def autolabel_auc(rects, ax):
function autolabel_time (line 248) | def autolabel_time(rects, ax):
FILE: benchmarks/bench_plot_fastkmeans.py
function compute_bench (line 10) | def compute_bench(samples_range, features_range):
function compute_bench_2 (line 57) | def compute_bench_2(chunks):
FILE: benchmarks/bench_plot_hierarchical.py
function compute_bench (line 10) | def compute_bench(samples_range, features_range):
FILE: benchmarks/bench_plot_incremental_pca.py
function plot_results (line 19) | def plot_results(X, y, label):
function benchmark (line 23) | def benchmark(estimator, data):
function plot_feature_times (line 35) | def plot_feature_times(all_times, batch_size, all_components, data):
function plot_feature_errors (line 50) | def plot_feature_errors(all_errors, batch_size, all_components, data):
function plot_batch_times (line 64) | def plot_batch_times(all_times, n_features, all_batch_sizes, data):
function plot_batch_errors (line 77) | def plot_batch_errors(all_errors, n_features, all_batch_sizes, data):
function fixed_batch_size_comparison (line 90) | def fixed_batch_size_comparison(data):
function variable_batch_size_comparison (line 113) | def variable_batch_size_comparison(data):
FILE: benchmarks/bench_plot_lasso_path.py
function compute_bench (line 17) | def compute_bench(samples_range, features_range):
FILE: benchmarks/bench_plot_neighbors.py
function get_data (line 13) | def get_data(N, D, dataset="dense"):
function barplot_neighbors (line 26) | def barplot_neighbors(
FILE: benchmarks/bench_plot_nmf.py
function _norm (line 42) | def _norm(x):
function _nls_subproblem (line 49) | def _nls_subproblem(
function _fit_projected_gradient (line 151) | def _fit_projected_gradient(X, W, H, tol, max_iter, nls_max_iter, alpha,...
class _PGNMF (line 195) | class _PGNMF(NMF):
method __init__ (line 203) | def __init__(
method fit (line 227) | def fit(self, X, y=None, **params):
method transform (line 231) | def transform(self, X):
method inverse_transform (line 237) | def inverse_transform(self, W):
method fit_transform (line 241) | def fit_transform(self, X, y=None, W=None, H=None):
method _fit_transform (line 246) | def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
function plot_results (line 323) | def plot_results(results_df, plot_name):
function bench_one (line 359) | def bench_one(
function run_bench (line 376) | def run_bench(X, clfs, plot_name, n_components, tol, alpha, l1_ratio):
function load_20news (line 414) | def load_20news():
function load_faces (line 427) | def load_faces():
function build_clfs (line 436) | def build_clfs(cd_iters, pg_iters, mu_iters):
FILE: benchmarks/bench_plot_omp_lars.py
function compute_bench (line 16) | def compute_bench(samples_range, features_range):
FILE: benchmarks/bench_plot_parallel_pairwise.py
function plot (line 12) | def plot(func):
function euclidean_distances (line 38) | def euclidean_distances(X, n_jobs):
function rbf_kernels (line 42) | def rbf_kernels(X, n_jobs):
FILE: benchmarks/bench_plot_randomized_svd.py
function unpickle (line 132) | def unpickle(file_name):
function handle_missing_dataset (line 137) | def handle_missing_dataset(file_folder):
function get_data (line 143) | def get_data(dataset_name):
function plot_time_vs_s (line 197) | def plot_time_vs_s(time, norm, point_labels, title):
function scatter_time_vs_s (line 221) | def scatter_time_vs_s(time, norm, point_labels, title):
function plot_power_iter_vs_s (line 260) | def plot_power_iter_vs_s(power_iter, s, title):
function svd_timing (line 270) | def svd_timing(
function norm_diff (line 302) | def norm_diff(A, norm=2, msg=True, random_state=None):
function scalable_frobenius_norm_discrepancy (line 324) | def scalable_frobenius_norm_discrepancy(X, U, s, V):
function bench_a (line 340) | def bench_a(X, dataset_name, power_iter, n_oversamples, n_comps):
function bench_b (line 396) | def bench_b(power_list):
function bench_c (line 443) | def bench_c(datasets, n_comps):
FILE: benchmarks/bench_plot_svd.py
function compute_bench (line 15) | def compute_bench(samples_range, features_range, n_iter=3, rank=50):
FILE: benchmarks/bench_random_projections.py
function type_auto_or_float (line 26) | def type_auto_or_float(val):
function type_auto_or_int (line 33) | def type_auto_or_int(val):
function compute_time (line 40) | def compute_time(t_start, delta):
function bench_scikit_transformer (line 46) | def bench_scikit_transformer(X, transformer):
function make_sparse_random_data (line 70) | def make_sparse_random_data(n_samples, n_features, n_nonzeros, random_st...
function print_row (line 85) | def print_row(clf_type, time_fit, time_transform):
FILE: benchmarks/bench_rcv1_logreg_convergence.py
function get_loss (line 25) | def get_loss(w, intercept, myX, myy, C):
function bench_one (line 38) | def bench_one(name, clf_type, clf_params, n_iter):
function bench (line 67) | def bench(clfs):
function plot_train_losses (line 103) | def plot_train_losses(clfs):
function plot_train_scores (line 112) | def plot_train_scores(clfs):
function plot_test_scores (line 122) | def plot_test_scores(clfs):
function plot_dloss (line 132) | def plot_dloss(clfs):
function get_max_squared_sum (line 150) | def get_max_squared_sum(X):
FILE: benchmarks/bench_saga.py
function fit_single (line 28) | def fit_single(
function _predict_proba (line 137) | def _predict_proba(lr, X):
function exp (line 144) | def exp(
function plot (line 238) | def plot(outname=None):
FILE: benchmarks/bench_sample_without_replacement.py
function compute_time (line 18) | def compute_time(t_start, delta):
function bench_sample (line 24) | def bench_sample(sampling, n_population, n_samples):
FILE: benchmarks/bench_sparsify.py
function sparsity_ratio (line 54) | def sparsity_ratio(X):
function benchmark_dense_predict (line 86) | def benchmark_dense_predict():
function benchmark_sparse_predict (line 91) | def benchmark_sparse_predict():
function score (line 97) | def score(y_test, y_pred, case):
FILE: benchmarks/bench_text_vectorizers.py
function run_vectorizer (line 28) | def run_vectorizer(Vectorizer, X, **params):
FILE: benchmarks/bench_tree.py
function bench_scikit_tree_classifier (line 28) | def bench_scikit_tree_classifier(X, Y):
function bench_scikit_tree_regressor (line 45) | def bench_scikit_tree_regressor(X, Y):
FILE: benchmarks/bench_tsne_mnist.py
function load_data (line 35) | def load_data(dtype=np.float32, order="C", shuffle=True, seed=0):
function nn_accuracy (line 51) | def nn_accuracy(X, X_embedded, k=1):
function tsne_fit_transform (line 59) | def tsne_fit_transform(model, data):
function sanitize (line 64) | def sanitize(filename):
function bhtsne (line 146) | def bhtsne(X):
FILE: build_tools/circle/list_versions.py
function json_urlread (line 12) | def json_urlread(url):
function human_readable_data_quantity (line 20) | def human_readable_data_quantity(quantity, multiple=1024):
function get_file_extension (line 35) | def get_file_extension(version):
function get_file_size (line 46) | def get_file_size(version):
FILE: build_tools/generate_authors_table.py
function get (line 25) | def get(url):
function get_contributors (line 41) | def get_contributors():
function get_profile (line 97) | def get_profile(login):
function key (line 122) | def key(profile):
function generate_table (line 128) | def generate_table(contributors):
function generate_list (line 149) | def generate_list(contributors):
FILE: build_tools/github/vendor.py
function make_distributor_init_32_bits (line 22) | def make_distributor_init_32_bits(
function make_distributor_init_64_bits (line 63) | def make_distributor_init_64_bits(
function main (line 112) | def main(wheel_dirname, bitness):
FILE: doc/conf.py
class SubSectionTitleOrder (line 326) | class SubSectionTitleOrder:
method __init__ (line 333) | def __init__(self, src_dir):
method __repr__ (line 337) | def __repr__(self):
method __call__ (line 340) | def __call__(self, directory):
function make_carousel_thumbs (line 396) | def make_carousel_thumbs(app, exception):
function filter_search_index (line 410) | def filter_search_index(app, exception):
function generate_min_dependency_table (line 431) | def generate_min_dependency_table(app):
function generate_min_dependency_substitutions (line 481) | def generate_min_dependency_substitutions(app):
function setup (line 504) | def setup(app):
FILE: doc/conftest.py
function setup_labeled_faces (line 16) | def setup_labeled_faces():
function setup_rcv1 (line 22) | def setup_rcv1():
function setup_twenty_newsgroups (line 30) | def setup_twenty_newsgroups():
function setup_working_with_text_data (line 36) | def setup_working_with_text_data():
function setup_loading_other_datasets (line 45) | def setup_loading_other_datasets():
function setup_compose (line 60) | def setup_compose():
function setup_impute (line 67) | def setup_impute():
function setup_grid_search (line 74) | def setup_grid_search():
function setup_preprocessing (line 81) | def setup_preprocessing():
function setup_unsupervised_learning (line 91) | def setup_unsupervised_learning():
function skip_if_matplotlib_not_installed (line 102) | def skip_if_matplotlib_not_installed(fname):
function pytest_runtest_setup (line 110) | def pytest_runtest_setup(item):
function pytest_configure (line 153) | def pytest_configure(config):
FILE: doc/sphinxext/add_toctree_functions.py
function add_toctree_functions (line 37) | def add_toctree_functions(app, pagename, templatename, context, doctree):
function docutils_node_to_jinja (line 94) | def docutils_node_to_jinja(list_item, only_pages=False, numbered=False):
function setup (line 157) | def setup(app):
FILE: doc/sphinxext/custom_references_resolver.py
class CustomReferencesResolver (line 42) | class CustomReferencesResolver(ReferencesResolver):
method resolve_anyref (line 43) | def resolve_anyref(self, refdoc, node, contnode):
method create_node (line 102) | def create_node(self, result):
function setup (line 117) | def setup(app):
FILE: doc/sphinxext/doi_role.py
function reference_role (line 23) | def reference_role(typ, rawtext, text, lineno, inliner, options={}, cont...
function setup_link_role (line 40) | def setup_link_role(app):
function setup (line 47) | def setup(app):
FILE: doc/sphinxext/github_link.py
function _get_git_revision (line 11) | def _get_git_revision():
function _linkcode_resolve (line 20) | def _linkcode_resolve(domain, info, package, url_fmt, revision):
function make_linkcode_resolve (line 70) | def make_linkcode_resolve(package, url_fmt):
FILE: doc/sphinxext/sphinx_issues.py
function user_role (line 32) | def user_role(name, rawtext, text, lineno, inliner, options=None, conten...
function cve_role (line 61) | def cve_role(name, rawtext, text, lineno, inliner, options=None, content...
class IssueRole (line 78) | class IssueRole(object):
method __init__ (line 82) | def __init__(
method default_format_text (line 91) | def default_format_text(issue_no):
method make_node (line 94) | def make_node(self, name, issue_no, config, options=None):
method __call__ (line 133) | def __call__(
function format_commit_text (line 177) | def format_commit_text(sha):
function setup (line 195) | def setup(app):
FILE: doc/themes/scikit-learn-modern/static/js/searchtools.js
function splitQuery (line 56) | function splitQuery(query) {
function pulse (line 128) | function pulse() {
function displayNextItem (line 272) | function displayNextItem() {
FILE: doc/tutorial/machine_learning_map/parse_path.py
function Command (line 18) | def Command(char):
function Arguments (line 22) | def Arguments(token):
class CaselessPreservingLiteral (line 26) | class CaselessPreservingLiteral(CaselessLiteral):
method __init__ (line 30) | def __init__( self, matchString ):
method parseImpl (line 36) | def parseImpl( self, instring, loc, doActions=True ):
function Sequence (line 46) | def Sequence(token):
function convertToFloat (line 54) | def convertToFloat(s, loc, toks):
function get_points (line 149) | def get_points(d):
FILE: doc/tutorial/machine_learning_map/pyparsing.py
function _ustr (line 132) | def _ustr(obj):
function _xml_escape (line 163) | def _xml_escape(data):
class _Constants (line 173) | class _Constants(object):
class ParseBaseException (line 183) | class ParseBaseException(Exception):
method __init__ (line 187) | def __init__( self, pstr, loc=0, msg=None, elem=None ):
method _from_exception (line 199) | def _from_exception(cls, pe):
method __getattr__ (line 206) | def __getattr__( self, aname ):
method __str__ (line 221) | def __str__( self ):
method __repr__ (line 224) | def __repr__( self ):
method markInputline (line 226) | def markInputline( self, markerString = ">!<" ):
method __dir__ (line 236) | def __dir__(self):
class ParseException (line 239) | class ParseException(ParseBaseException):
class ParseFatalException (line 260) | class ParseFatalException(ParseBaseException):
class ParseSyntaxException (line 265) | class ParseSyntaxException(ParseFatalException):
class RecursiveGrammarException (line 284) | class RecursiveGrammarException(Exception):
method __init__ (line 286) | def __init__( self, parseElementList ):
method __str__ (line 289) | def __str__( self ):
class _ParseResultsWithOffset (line 292) | class _ParseResultsWithOffset(object):
method __init__ (line 293) | def __init__(self,p1,p2):
method __getitem__ (line 295) | def __getitem__(self,i):
method __repr__ (line 297) | def __repr__(self):
method setOffset (line 299) | def setOffset(self,i):
class ParseResults (line 302) | class ParseResults(object):
method __new__ (line 341) | def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
method __init__ (line 350) | def __init__( self, toklist=None, name=None, asList=True, modal=True, ...
method __getitem__ (line 389) | def __getitem__( self, i ):
method __setitem__ (line 398) | def __setitem__( self, k, v, isinstance=isinstance ):
method __delitem__ (line 411) | def __delitem__( self, i ):
method __contains__ (line 432) | def __contains__( self, k ):
method __len__ (line 435) | def __len__( self ): return len( self.__toklist )
method __bool__ (line 436) | def __bool__(self): return ( not not self.__toklist )
method __iter__ (line 438) | def __iter__( self ): return iter( self.__toklist )
method __reversed__ (line 439) | def __reversed__( self ): return iter( self.__toklist[::-1] )
method _iterkeys (line 440) | def _iterkeys( self ):
method _itervalues (line 446) | def _itervalues( self ):
method _iteritems (line 449) | def _iteritems( self ):
method keys (line 472) | def keys( self ):
method values (line 476) | def values( self ):
method items (line 480) | def items( self ):
method haskeys (line 484) | def haskeys( self ):
method pop (line 489) | def pop( self, *args, **kwargs):
method get (line 541) | def get(self, key, defaultValue=None):
method insert (line 563) | def insert( self, index, insStr ):
method append (line 583) | def append( self, item ):
method extend (line 597) | def extend( self, itemseq ):
method clear (line 615) | def clear( self ):
method __getattr__ (line 622) | def __getattr__( self, name ):
method __add__ (line 636) | def __add__( self, other ):
method __iadd__ (line 641) | def __iadd__( self, other ):
method __radd__ (line 657) | def __radd__(self, other):
method __repr__ (line 665) | def __repr__( self ):
method __str__ (line 668) | def __str__( self ):
method _asStringList (line 671) | def _asStringList( self, sep='' ):
method asList (line 682) | def asList( self ):
method asDict (line 698) | def asDict( self ):
method copy (line 733) | def copy( self ):
method asXML (line 744) | def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatt...
method __lookup (line 805) | def __lookup(self,sub):
method getName (line 812) | def getName(self):
method dump (line 849) | def dump(self, indent='', depth=0, full=True):
method pprint (line 894) | def pprint(self, *args, **kwargs):
method __getstate__ (line 918) | def __getstate__(self):
method __setstate__ (line 925) | def __setstate__(self,state):
method __getnewargs__ (line 938) | def __getnewargs__(self):
method __dir__ (line 941) | def __dir__(self):
function col (line 946) | def col (loc,strg):
function lineno (line 959) | def lineno(loc,strg):
function line (line 971) | def line( loc, strg ):
function _defaultStartDebugAction (line 981) | def _defaultStartDebugAction( instring, loc, expr ):
function _defaultSuccessDebugAction (line 984) | def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
function _defaultExceptionDebugAction (line 987) | def _defaultExceptionDebugAction( instring, loc, expr, exc ):
function nullDebugAction (line 990) | def nullDebugAction(*args):
function _trim_arity (line 1017) | def _trim_arity(func, maxargs=2):
class ParserElement (line 1075) | class ParserElement(object):
method setDefaultWhitespaceChars (line 1081) | def setDefaultWhitespaceChars( chars ):
method inlineLiteralsUsing (line 1096) | def inlineLiteralsUsing(cls):
method __init__ (line 1116) | def __init__( self, savelist=False ):
method copy (line 1139) | def copy( self ):
method setName (line 1162) | def setName( self, name ):
method setResultsName (line 1176) | def setResultsName( self, name, listAllMatches=False ):
method setBreak (line 1204) | def setBreak(self,breakFlag = True):
method setParseAction (line 1222) | def setParseAction( self, *fns, **kwargs ):
method addParseAction (line 1260) | def addParseAction( self, *fns, **kwargs ):
method addCondition (line 1270) | def addCondition(self, *fns, **kwargs):
method setFailAction (line 1297) | def setFailAction( self, fn ):
method _skipIgnorables (line 1310) | def _skipIgnorables( self, instring, loc ):
method preParse (line 1323) | def preParse( self, instring, loc ):
method parseImpl (line 1335) | def parseImpl( self, instring, loc, doActions=True ):
method postParse (line 1338) | def postParse( self, instring, loc, tokenlist ):
method _parseNoCache (line 1342) | def _parseNoCache( self, instring, loc, doActions=True, callPreParse=T...
method tryParse (line 1414) | def tryParse( self, instring, loc ):
method canParseNext (line 1420) | def canParseNext(self, instring, loc):
class _UnboundedCache (line 1428) | class _UnboundedCache(object):
method __init__ (line 1429) | def __init__(self):
class _FifoCache (line 1451) | class _FifoCache(object):
method __init__ (line 1452) | def __init__(self, size):
method __init__ (line 1481) | def __init__(self, size):
class _FifoCache (line 1480) | class _FifoCache(object):
method __init__ (line 1452) | def __init__(self, size):
method __init__ (line 1481) | def __init__(self, size):
method _parseCache (line 1515) | def _parseCache( self, instring, loc, doActions=True, callPreParse=Tru...
method resetCache (line 1541) | def resetCache():
method enablePackrat (line 1547) | def enablePackrat(cache_size_limit=128):
method parseString (line 1581) | def parseString( self, instring, parseAll=False ):
method scanString (line 1631) | def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
method transformString (line 1702) | def transformString( self, instring ):
method searchString (line 1745) | def searchString( self, instring, maxMatches=_MAX_INT ):
method split (line 1772) | def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
method __add__ (line 1794) | def __add__(self, other ):
method __radd__ (line 1814) | def __radd__(self, other ):
method __sub__ (line 1826) | def __sub__(self, other):
method __rsub__ (line 1838) | def __rsub__(self, other ):
method __mul__ (line 1850) | def __mul__(self,other):
method __rmul__ (line 1918) | def __rmul__(self, other):
method __or__ (line 1921) | def __or__(self, other ):
method __ror__ (line 1933) | def __ror__(self, other ):
method __xor__ (line 1945) | def __xor__(self, other ):
method __rxor__ (line 1957) | def __rxor__(self, other ):
method __and__ (line 1969) | def __and__(self, other ):
method __rand__ (line 1981) | def __rand__(self, other ):
method __invert__ (line 1993) | def __invert__( self ):
method __call__ (line 1999) | def __call__(self, name=None):
method suppress (line 2018) | def suppress( self ):
method leaveWhitespace (line 2025) | def leaveWhitespace( self ):
method setWhitespaceChars (line 2034) | def setWhitespaceChars( self, chars ):
method parseWithTabs (line 2043) | def parseWithTabs( self ):
method ignore (line 2052) | def ignore( self, other ):
method setDebugActions (line 2075) | def setDebugActions( self, startAction, successAction, exceptionAction ):
method setDebug (line 2085) | def setDebug( self, flag=True ):
method __str__ (line 2126) | def __str__( self ):
method __repr__ (line 2129) | def __repr__( self ):
method streamline (line 2132) | def streamline( self ):
method checkRecursion (line 2137) | def checkRecursion( self, parseElementList ):
method validate (line 2140) | def validate( self, validateTrace=[] ):
method parseFile (line 2146) | def parseFile( self, file_or_filename, parseAll=False ):
method __eq__ (line 2166) | def __eq__(self,other):
method __ne__ (line 2174) | def __ne__(self,other):
method __hash__ (line 2177) | def __hash__(self):
method __req__ (line 2180) | def __req__(self,other):
method __rne__ (line 2183) | def __rne__(self,other):
method matches (line 2186) | def matches(self, testString, parseAll=True):
method runTests (line 2205) | def runTests(self, tests, parseAll=True, comment='#', fullDump=True, p...
class Token (line 2337) | class Token(ParserElement):
method __init__ (line 2341) | def __init__( self ):
class Empty (line 2345) | class Empty(Token):
method __init__ (line 2349) | def __init__( self ):
class NoMatch (line 2356) | class NoMatch(Token):
method __init__ (line 2360) | def __init__( self ):
method parseImpl (line 2367) | def parseImpl( self, instring, loc, doActions=True ):
class Literal (line 2371) | class Literal(Token):
method __init__ (line 2385) | def __init__( self, matchString ):
method parseImpl (line 2404) | def parseImpl( self, instring, loc, doActions=True ):
class Keyword (line 2412) | class Keyword(Token):
method __init__ (line 2431) | def __init__( self, matchString, identChars=None, caseless=False ):
method parseImpl (line 2452) | def parseImpl( self, instring, loc, doActions=True ):
method copy (line 2466) | def copy(self):
method setDefaultKeywordChars (line 2472) | def setDefaultKeywordChars( chars ):
class CaselessLiteral (line 2477) | class CaselessLiteral(Literal):
method __init__ (line 2488) | def __init__( self, matchString ):
method parseImpl (line 2495) | def parseImpl( self, instring, loc, doActions=True ):
class CaselessKeyword (line 2500) | class CaselessKeyword(Keyword):
method __init__ (line 2509) | def __init__( self, matchString, identChars=None ):
method parseImpl (line 2512) | def parseImpl( self, instring, loc, doActions=True ):
class CloseMatch (line 2518) | class CloseMatch(Token):
method __init__ (line 2543) | def __init__(self, match_string, maxMismatches=1):
method parseImpl (line 2552) | def parseImpl( self, instring, loc, doActions=True ):
class Word (line 2579) | class Word(Token):
method __init__ (line 2626) | def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, ...
method parseImpl (line 2680) | def parseImpl( self, instring, loc, doActions=True ):
method __str__ (line 2715) | def __str__( self ):
class Regex (line 2738) | class Regex(Token):
method __init__ (line 2752) | def __init__( self, pattern, flags=0):
method parseImpl (line 2786) | def parseImpl( self, instring, loc, doActions=True ):
method __str__ (line 2799) | def __str__( self ):
class QuotedString (line 2811) | class QuotedString(Token):
method __init__ (line 2836) | def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=...
method parseImpl (line 2901) | def parseImpl( self, instring, loc, doActions=True ):
method __str__ (line 2936) | def __str__( self ):
class CharsNotIn (line 2948) | class CharsNotIn(Token):
method __init__ (line 2964) | def __init__( self, notChars, min=1, max=0, exact=0 ):
method parseImpl (line 2988) | def parseImpl( self, instring, loc, doActions=True ):
method __str__ (line 3005) | def __str__( self ):
class White (line 3019) | class White(Token):
method __init__ (line 3034) | def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
method parseImpl (line 3054) | def parseImpl( self, instring, loc, doActions=True ):
class _PositionToken (line 3070) | class _PositionToken(Token):
method __init__ (line 3071) | def __init__( self ):
class GoToColumn (line 3077) | class GoToColumn(_PositionToken):
method __init__ (line 3081) | def __init__( self, colno ):
method preParse (line 3085) | def preParse( self, instring, loc ):
method parseImpl (line 3094) | def parseImpl( self, instring, loc, doActions=True ):
class LineStart (line 3103) | class LineStart(_PositionToken):
method __init__ (line 3124) | def __init__( self ):
method parseImpl (line 3128) | def parseImpl( self, instring, loc, doActions=True ):
class LineEnd (line 3133) | class LineEnd(_PositionToken):
method __init__ (line 3137) | def __init__( self ):
method parseImpl (line 3142) | def parseImpl( self, instring, loc, doActions=True ):
class StringStart (line 3153) | class StringStart(_PositionToken):
method __init__ (line 3157) | def __init__( self ):
method parseImpl (line 3161) | def parseImpl( self, instring, loc, doActions=True ):
class StringEnd (line 3168) | class StringEnd(_PositionToken):
method __init__ (line 3172) | def __init__( self ):
method parseImpl (line 3176) | def parseImpl( self, instring, loc, doActions=True ):
class WordStart (line 3186) | class WordStart(_PositionToken):
method __init__ (line 3194) | def __init__(self, wordChars = printables):
method parseImpl (line 3199) | def parseImpl(self, instring, loc, doActions=True ):
class WordEnd (line 3206) | class WordEnd(_PositionToken):
method __init__ (line 3214) | def __init__(self, wordChars = printables):
method parseImpl (line 3220) | def parseImpl(self, instring, loc, doActions=True ):
class ParseExpression (line 3229) | class ParseExpression(ParserElement):
method __init__ (line 3233) | def __init__( self, exprs, savelist = False ):
method __getitem__ (line 3253) | def __getitem__( self, i ):
method append (line 3256) | def append( self, other ):
method leaveWhitespace (line 3261) | def leaveWhitespace( self ):
method ignore (line 3270) | def ignore( self, other ):
method __str__ (line 3282) | def __str__( self ):
method streamline (line 3292) | def streamline( self ):
method setResultsName (line 3326) | def setResultsName( self, name, listAllMatches=False ):
method validate (line 3330) | def validate( self, validateTrace=[] ):
method copy (line 3336) | def copy(self):
class And (line 3341) | class And(ParseExpression):
class _ErrorStop (line 3357) | class _ErrorStop(Empty):
method __init__ (line 3358) | def __init__(self, *args, **kwargs):
method __init__ (line 3363) | def __init__( self, exprs, savelist = True ):
method parseImpl (line 3370) | def parseImpl( self, instring, loc, doActions=True ):
method __iadd__ (line 3395) | def __iadd__(self, other ):
method checkRecursion (line 3400) | def checkRecursion( self, parseElementList ):
method __str__ (line 3407) | def __str__( self ):
class Or (line 3417) | class Or(ParseExpression):
method __init__ (line 3431) | def __init__( self, exprs, savelist = False ):
method parseImpl (line 3438) | def parseImpl( self, instring, loc, doActions=True ):
method __ixor__ (line 3476) | def __ixor__(self, other ):
method __str__ (line 3481) | def __str__( self ):
method checkRecursion (line 3490) | def checkRecursion( self, parseElementList ):
class MatchFirst (line 3496) | class MatchFirst(ParseExpression):
method __init__ (line 3513) | def __init__( self, exprs, savelist = False ):
method parseImpl (line 3520) | def parseImpl( self, instring, loc, doActions=True ):
method __ior__ (line 3544) | def __ior__(self, other ):
method __str__ (line 3549) | def __str__( self ):
method checkRecursion (line 3558) | def checkRecursion( self, parseElementList ):
class Each (line 3564) | class Each(ParseExpression):
method __init__ (line 3618) | def __init__( self, exprs, savelist = True ):
method parseImpl (line 3624) | def parseImpl( self, instring, loc, doActions=True ):
method __str__ (line 3673) | def __str__( self ):
method checkRecursion (line 3682) | def checkRecursion( self, parseElementList ):
class ParseElementEnhance (line 3688) | class ParseElementEnhance(ParserElement):
method __init__ (line 3692) | def __init__( self, expr, savelist=False ):
method parseImpl (line 3710) | def parseImpl( self, instring, loc, doActions=True ):
method leaveWhitespace (line 3716) | def leaveWhitespace( self ):
method ignore (line 3723) | def ignore( self, other ):
method streamline (line 3735) | def streamline( self ):
method checkRecursion (line 3741) | def checkRecursion( self, parseElementList ):
method validate (line 3748) | def validate( self, validateTrace=[] ):
method __str__ (line 3754) | def __str__( self ):
class FollowedBy (line 3765) | class FollowedBy(ParseElementEnhance):
method __init__ (line 3782) | def __init__( self, expr ):
method parseImpl (line 3786) | def parseImpl( self, instring, loc, doActions=True ):
class NotAny (line 3791) | class NotAny(ParseElementEnhance):
method __init__ (line 3802) | def __init__( self, expr ):
method parseImpl (line 3809) | def parseImpl( self, instring, loc, doActions=True ):
method __str__ (line 3814) | def __str__( self ):
class _MultipleMatch (line 3823) | class _MultipleMatch(ParseElementEnhance):
method __init__ (line 3824) | def __init__( self, expr, stopOn=None):
method parseImpl (line 3832) | def parseImpl( self, instring, loc, doActions=True ):
class OneOrMore (line 3861) | class OneOrMore(_MultipleMatch):
method __str__ (line 3887) | def __str__( self ):
class ZeroOrMore (line 3896) | class ZeroOrMore(_MultipleMatch):
method __init__ (line 3908) | def __init__( self, expr, stopOn=None):
method parseImpl (line 3912) | def parseImpl( self, instring, loc, doActions=True ):
method __str__ (line 3918) | def __str__( self ):
class _NullToken (line 3927) | class _NullToken(object):
method __bool__ (line 3928) | def __bool__(self):
method __str__ (line 3931) | def __str__(self):
class Optional (line 3935) | class Optional(ParseElementEnhance):
method __init__ (line 3970) | def __init__( self, expr, default=_optionalNotMatched ):
method parseImpl (line 3976) | def parseImpl( self, instring, loc, doActions=True ):
method __str__ (line 3990) | def __str__( self ):
class SkipTo (line 3999) | class SkipTo(ParseElementEnhance):
method __init__ (line 4054) | def __init__( self, other, include=False, ignore=None, failOn=None ):
method parseImpl (line 4067) | def parseImpl( self, instring, loc, doActions=True ):
class Forward (line 4114) | class Forward(ParseElementEnhance):
method __init__ (line 4133) | def __init__( self, other=None ):
method __lshift__ (line 4136) | def __lshift__( self, other ):
method __ilshift__ (line 4149) | def __ilshift__(self, other):
method leaveWhitespace (line 4152) | def leaveWhitespace( self ):
method streamline (line 4156) | def streamline( self ):
method validate (line 4163) | def validate( self, validateTrace=[] ):
method __str__ (line 4170) | def __str__( self ):
method copy (line 4187) | def copy(self):
class _ForwardNoRecurse (line 4195) | class _ForwardNoRecurse(Forward):
method __str__ (line 4196) | def __str__( self ):
class TokenConverter (line 4199) | class TokenConverter(ParseElementEnhance):
method __init__ (line 4203) | def __init__( self, expr, savelist=False ):
class Combine (line 4207) | class Combine(TokenConverter):
method __init__ (line 4224) | def __init__( self, expr, joinString="", adjacent=True ):
method ignore (line 4234) | def ignore( self, other ):
method postParse (line 4241) | def postParse( self, instring, loc, tokenlist ):
class Group (line 4251) | class Group(TokenConverter):
method __init__ (line 4265) | def __init__( self, expr ):
method postParse (line 4269) | def postParse( self, instring, loc, tokenlist ):
class Dict (line 4272) | class Dict(TokenConverter):
method __init__ (line 4308) | def __init__( self, expr ):
method postParse (line 4312) | def postParse( self, instring, loc, tokenlist ):
class Suppress (line 4337) | class Suppress(TokenConverter):
method postParse (line 4356) | def postParse( self, instring, loc, tokenlist ):
method suppress (line 4359) | def suppress( self ):
class OnlyOnce (line 4363) | class OnlyOnce(object):
method __init__ (line 4367) | def __init__(self, methodCall):
method __call__ (line 4370) | def __call__(self,s,l,t):
method reset (line 4376) | def reset(self):
function traceParseAction (line 4379) | def traceParseAction(f):
function delimitedList (line 4423) | def delimitedList( expr, delim=",", combine=False ):
function countedArray (line 4442) | def countedArray( expr, intExpr=None ):
function _flatten (line 4473) | def _flatten(L):
function matchPreviousLiteral (line 4482) | def matchPreviousLiteral(expr):
function matchPreviousExpr (line 4510) | def matchPreviousExpr(expr):
function _escapeRegexRangeChars (line 4538) | def _escapeRegexRangeChars(s):
function oneOf (line 4546) | def oneOf( strs, caseless=False, useRegex=True ):
function dictOf (line 4619) | def dictOf( key, value ):
function originalTextFor (line 4654) | def originalTextFor(expr, asString=True):
function ungroup (line 4691) | def ungroup(expr):
function locatedExpr (line 4698) | def locatedExpr(expr):
function srange (line 4736) | def srange(s):
function matchOnlyAtCol (line 4760) | def matchOnlyAtCol(n):
function replaceWith (line 4770) | def replaceWith(replStr):
function removeQuotes (line 4784) | def removeQuotes(s,l,t):
function tokenMap (line 4798) | def tokenMap(func, *args):
function _makeTags (line 4848) | def _makeTags(tagStr, xml):
function makeHTMLTags (line 4877) | def makeHTMLTags(tagStr):
function makeXMLTags (line 4896) | def makeXMLTags(tagStr):
function withAttribute (line 4905) | def withAttribute(*args,**attrDict):
function withClass (line 4970) | def withClass(classname, namespace=''):
function infixNotation (line 5009) | def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress('...
function nestedExpr (line 5130) | def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedSt...
function indentedBlock (line 5220) | def indentedBlock(blockStatementExpr, indentStack, indent=True):
function replaceHTMLEntity (line 5340) | def replaceHTMLEntity(t):
class pyparsing_common (line 5372) | class pyparsing_common:
method convertToDate (line 5566) | def convertToDate(fmt="%Y-%m-%d"):
method convertToDatetime (line 5588) | def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
method stripHTMLTags (line 5620) | def stripHTMLTags(s, l, tokens):
FILE: examples/applications/plot_cyclical_feature_engineering.py
function evaluate (line 222) | def evaluate(model, X, y, cv):
function sin_transformer (line 354) | def sin_transformer(period):
function cos_transformer (line 358) | def cos_transformer(period):
function periodic_spline_transformer (line 431) | def periodic_spline_transformer(period, n_splines=None, degree=3):
FILE: examples/applications/plot_digits_denoising.py
function plot_digits (line 73) | def plot_digits(X, title):
FILE: examples/applications/plot_face_recognition.py
function plot_gallery (line 134) | def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
function title (line 149) | def title(y_pred, y_test, target_names, i):
FILE: examples/applications/plot_model_complexity_influence.py
function generate_data (line 71) | def generate_data(case):
function benchmark_influence (line 102) | def benchmark_influence(conf):
function _count_nonzero_coefficients (line 154) | def _count_nonzero_coefficients(estimator):
function plot_influence (line 228) | def plot_influence(conf, mse_values, prediction_times, complexities):
FILE: examples/applications/plot_out_of_core_classification.py
function _not_in_sphinx (line 43) | def _not_in_sphinx():
class ReutersParser (line 57) | class ReutersParser(HTMLParser):
method __init__ (line 60) | def __init__(self, encoding="latin-1"):
method handle_starttag (line 65) | def handle_starttag(self, tag, attrs):
method handle_endtag (line 69) | def handle_endtag(self, tag):
method _reset (line 73) | def _reset(self):
method parse (line 83) | def parse(self, fd):
method handle_data (line 92) | def handle_data(self, data):
method start_reuters (line 100) | def start_reuters(self, attributes):
method end_reuters (line 103) | def end_reuters(self):
method start_title (line 110) | def start_title(self, attributes):
method end_title (line 113) | def end_title(self):
method start_body (line 116) | def start_body(self, attributes):
method end_body (line 119) | def end_body(self):
method start_topics (line 122) | def start_topics(self, attributes):
method end_topics (line 125) | def end_topics(self):
method start_d (line 128) | def start_d(self, attributes):
method end_d (line 131) | def end_d(self):
function stream_reuters_documents (line 137) | def stream_reuters_documents(data_path=None):
function get_minibatch (line 212) | def get_minibatch(doc_iter, size, pos_class=positive_class):
function iter_minibatches (line 229) | def iter_minibatches(doc_iter, minibatch_size):
function progress (line 253) | def progress(cls_name, stats):
function plot_accuracy (line 336) | def plot_accuracy(x, y, x_legend):
function autolabel (line 389) | def autolabel(rectangles):
FILE: examples/applications/plot_prediction_latency.py
function _not_in_sphinx (line 36) | def _not_in_sphinx():
function atomic_benchmark_estimator (line 41) | def atomic_benchmark_estimator(estimator, X_test, verbose=False):
function bulk_benchmark_estimator (line 60) | def bulk_benchmark_estimator(estimator, X_test, n_bulk_repeats, verbose):
function benchmark_estimator (line 79) | def benchmark_estimator(estimator, X_test, n_bulk_repeats=30, verbose=Fa...
function generate_dataset (line 100) | def generate_dataset(n_train, n_test, n_features, noise=0.1, verbose=Fal...
function boxplot_runtimes (line 129) | def boxplot_runtimes(runtimes, pred_type, configuration):
function benchmark (line 172) | def benchmark(configuration):
function n_feature_influence (line 195) | def n_feature_influence(estimators, n_train, n_test, n_features, percent...
function plot_n_features_influence (line 227) | def plot_n_features_influence(percentiles, percentile):
function benchmark_throughputs (line 246) | def benchmark_throughputs(configuration, duration_secs=0.1):
function plot_benchmark_throughput (line 263) | def plot_benchmark_throughput(throughputs, configuration):
FILE: examples/applications/plot_species_distribution_modeling.py
function construct_grids (line 63) | def construct_grids(batch):
function create_species_bunch (line 90) | def create_species_bunch(species_name, train, test, coverages, xgrid, yg...
function plot_species_distribution (line 113) | def plot_species_distribution(
FILE: examples/applications/plot_tomography_l1_reconstruction.py
function _weights (line 50) | def _weights(x, dx=1, orig=0):
function _generate_center_coordinates (line 57) | def _generate_center_coordinates(l_x):
function build_projection_operator (line 65) | def build_projection_operator(l_x, n_dir):
function generate_synthetic_data (line 97) | def generate_synthetic_data():
FILE: examples/applications/plot_topics_extraction_with_nmf_lda.py
function plot_top_words (line 42) | def plot_top_words(model, feature_names, n_top_words, title):
FILE: examples/applications/svm_gui.py
class Model (line 47) | class Model:
method __init__ (line 53) | def __init__(self):
method changed (line 60) | def changed(self, event):
method add_observer (line 65) | def add_observer(self, observer):
method set_surface (line 69) | def set_surface(self, surface):
method dump_svmlight_file (line 72) | def dump_svmlight_file(self, file):
class Controller (line 79) | class Controller:
method __init__ (line 80) | def __init__(self, model):
method fit (line 87) | def fit(self):
method decision_surface (line 124) | def decision_surface(self, cls):
method clear_data (line 133) | def clear_data(self):
method add_example (line 138) | def add_example(self, x, y, label):
method refit (line 145) | def refit(self):
class View (line 151) | class View:
method __init__ (line 154) | def __init__(self, root, controller):
method plot_kernels (line 181) | def plot_kernels(self):
method onclick (line 186) | def onclick(self, event):
method update_example (line 193) | def update_example(self, model, idx):
method update (line 201) | def update(self, event, model):
method remove_surface (line 224) | def remove_surface(self):
method plot_support_vectors (line 235) | def plot_support_vectors(self, support_vectors):
method plot_decision_surface (line 248) | def plot_decision_surface(self, surface, type):
class ControllBar (line 270) | class ControllBar:
method __init__ (line 271) | def __init__(self, root, controller):
function get_parser (line 353) | def get_parser():
function main (line 367) | def main(argv):
FILE: examples/applications/wikipedia_principal_eigenvector.py
function index (line 75) | def index(redirects, index_map, k):
function short_name (line 85) | def short_name(nt_uri):
function get_redirects (line 90) | def get_redirects(redirects_filename):
function get_adjacency_matrix (line 122) | def get_adjacency_matrix(redirects_filename, page_links_filename, limit=...
function centrality_scores (line 181) | def centrality_scores(X, alpha=0.85, max_iter=100, tol=1e-10):
FILE: examples/bicluster/plot_bicluster_newsgroups.py
function number_normalizer (line 39) | def number_normalizer(tokens):
class NumberNormalizingVectorizer (line 49) | class NumberNormalizingVectorizer(TfidfVectorizer):
method build_tokenizer (line 50) | def build_tokenizer(self):
function bicluster_ncut (line 112) | def bicluster_ncut(i):
function most_common (line 127) | def most_common(d):
FILE: examples/calibration/plot_calibration_curve.py
class NaivelyCalibratedLinearSVC (line 203) | class NaivelyCalibratedLinearSVC(LinearSVC):
method fit (line 207) | def fit(self, X, y):
method predict_proba (line 213) | def predict_proba(self, X):
FILE: examples/calibration/plot_compare_calibration.py
class NaivelyCalibratedLinearSVC (line 65) | class NaivelyCalibratedLinearSVC(LinearSVC):
method fit (line 69) | def fit(self, X, y):
method predict_proba (line 75) | def predict_proba(self, X):
FILE: examples/classification/plot_lda.py
function generate_data (line 26) | def generate_data(n_samples, n_features):
FILE: examples/classification/plot_lda_qda.py
function dataset_fixed_cov (line 38) | def dataset_fixed_cov():
function dataset_cov (line 51) | def dataset_cov():
function plot_data (line 66) | def plot_data(lda, X, y, y_pred, fig_index):
function plot_ellipse (line 125) | def plot_ellipse(splot, mean, cov, color):
function plot_lda_cov (line 147) | def plot_lda_cov(lda, splot):
function plot_qda_cov (line 152) | def plot_qda_cov(qda, splot):
FILE: examples/cluster/plot_adjusted_for_chance_measures.py
function uniform_labelings_scores (line 33) | def uniform_labelings_scores(
function ami_score (line 59) | def ami_score(U, V):
FILE: examples/cluster/plot_agglomerative_clustering_metrics.py
function sqr (line 53) | def sqr(x):
FILE: examples/cluster/plot_agglomerative_dendrogram.py
function plot_dendrogram (line 20) | def plot_dendrogram(model, **kwargs):
FILE: examples/cluster/plot_color_quantization.py
function recreate_image (line 72) | def recreate_image(codebook, labels, w, h):
FILE: examples/cluster/plot_digits_linkage.py
function plot_clustering (line 48) | def plot_clustering(X_red, labels, title=None):
FILE: examples/cluster/plot_inductive_clustering.py
function _classifier_has (line 40) | def _classifier_has(attr):
class InductiveClusterer (line 53) | class InductiveClusterer(BaseEstimator):
method __init__ (line 54) | def __init__(self, clusterer, classifier):
method fit (line 58) | def fit(self, X, y=None):
method predict (line 66) | def predict(self, X):
method decision_function (line 71) | def decision_function(self, X):
function plot_scatter (line 76) | def plot_scatter(X, color, alpha=0.5):
FILE: examples/cluster/plot_kmeans_digits.py
function bench_k_means (line 61) | def bench_k_means(kmeans, name, data, labels):
FILE: examples/cluster/plot_kmeans_stability_low_dim_dense.py
function make_data (line 55) | def make_data(random_state, n_samples_per_center, grid_size, scale):
FILE: examples/compose/plot_column_transformer.py
function subject_body_extractor (line 81) | def subject_body_extractor(posts):
function text_stats (line 110) | def text_stats(posts):
FILE: examples/datasets/plot_random_multilabel_dataset.py
function plot_2d (line 61) | def plot_2d(ax, n_labels=1, n_classes=3, length=50):
FILE: examples/decomposition/plot_faces_decomposition.py
function plot_gallery (line 47) | def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gr...
FILE: examples/decomposition/plot_ica_vs_pca.py
function plot_samples (line 64) | def plot_samples(S, axis_list=None):
FILE: examples/decomposition/plot_image_denoising.py
function show_with_diff (line 105) | def show_with_diff(image, reference, title):
FILE: examples/decomposition/plot_pca_3d.py
function pdf (line 33) | def pdf(x):
function plot_figs (line 57) | def plot_figs(fig_num, elev, azim):
FILE: examples/decomposition/plot_pca_vs_fa_model_selection.py
function compute_scores (line 63) | def compute_scores(X):
function shrunk_cov_score (line 77) | def shrunk_cov_score(X):
function lw_score (line 83) | def lw_score(X):
FILE: examples/decomposition/plot_sparse_coding.py
function ricker_function (line 26) | def ricker_function(resolution, center, width):
function ricker_matrix (line 37) | def ricker_matrix(width, resolution, n_components):
FILE: examples/ensemble/plot_bias_variance.py
function f (line 94) | def f(x):
function generate (line 100) | def generate(n_samples, noise, n_repeat=1):
FILE: examples/ensemble/plot_feature_transformation.py
function rf_apply (line 111) | def rf_apply(X, model):
function gbdt_apply (line 126) | def gbdt_apply(X, model):
FILE: examples/ensemble/plot_gradient_boosting_categorical.py
function plot_results (line 143) | def plot_results(figure_title):
FILE: examples/ensemble/plot_gradient_boosting_early_stopping.py
function autolabel (line 113) | def autolabel(rects, n_estimators):
FILE: examples/ensemble/plot_gradient_boosting_oob.py
function heldout_score (line 75) | def heldout_score(clf, X_test, y_test):
function cv_estimate (line 83) | def cv_estimate(n_splits=None):
FILE: examples/ensemble/plot_gradient_boosting_quantile.py
function f (line 18) | def f(x):
function highlight_min (line 132) | def highlight_min(x):
function coverage_fraction (line 197) | def coverage_fraction(y, y_low, y_high):
FILE: examples/ensemble/plot_stack_predictors.py
function load_ames_housing (line 51) | def load_ames_housing():
function plot_regression_results (line 219) | def plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_t...
FILE: examples/gaussian_process/plot_gpc_isoprobability.py
function g (line 29) | def g(x):
FILE: examples/gaussian_process/plot_gpr_noisy.py
function target_generator (line 26) | def target_generator(X, add_noise=False):
FILE: examples/gaussian_process/plot_gpr_on_structured_data.py
class SequenceKernel (line 50) | class SequenceKernel(GenericKernelMixin, Kernel):
method __init__ (line 55) | def __init__(self, baseline_similarity=0.5, baseline_similarity_bounds...
method hyperparameter_baseline_similarity (line 60) | def hyperparameter_baseline_similarity(self):
method _f (line 65) | def _f(self, s1, s2):
method _g (line 73) | def _g(self, s1, s2):
method __call__ (line 79) | def __call__(self, X, Y=None, eval_gradient=False):
method diag (line 91) | def diag(self, X):
method is_stationary (line 94) | def is_stationary(self):
method clone_with_theta (line 97) | def clone_with_theta(self, theta):
FILE: examples/gaussian_process/plot_gpr_prior_posterior.py
function plot_gpr_samples (line 37) | def plot_gpr_samples(gpr_model, n_samples, ax):
FILE: examples/impute/plot_missing_values.py
function add_missing_values (line 59) | def add_missing_values(X_full, y_full):
function get_scores_for_imputer (line 113) | def get_scores_for_imputer(imputer, X_missing, y_missing):
function get_full_score (line 135) | def get_full_score(X_full, y_full):
function get_impute_zero_score (line 156) | def get_impute_zero_score(X_missing, y_missing):
function get_impute_knn_score (line 182) | def get_impute_knn_score(X_missing, y_missing):
function get_impute_mean (line 203) | def get_impute_mean(X_missing, y_missing):
function get_impute_iterative (line 229) | def get_impute_iterative(X_missing, y_missing):
FILE: examples/linear_model/plot_ard.py
function f (line 98) | def f(x, noise_amount):
FILE: examples/linear_model/plot_bayesian_ridge.py
function f (line 93) | def f(x, noise_amount):
FILE: examples/linear_model/plot_bayesian_ridge_curvefit.py
function func (line 37) | def func(x):
FILE: examples/linear_model/plot_lasso_model_selection.py
function plot_ic_criterion (line 82) | def plot_ic_criterion(model, name, color):
FILE: examples/linear_model/plot_logistic_multinomial.py
function plot_hyperplane (line 64) | def plot_hyperplane(c, color):
FILE: examples/linear_model/plot_ols_3d.py
function plot_figs (line 38) | def plot_figs(fig_num, elev, azim, X_train, clf):
FILE: examples/linear_model/plot_poisson_regression_non_normal_loss.py
function score_estimator (line 160) | def score_estimator(estimator, df_test):
function _mean_frequency_by_risk_group (line 389) | def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None, n_...
function lorenz_curve (line 491) | def lorenz_curve(y_true, y_pred, exposure):
FILE: examples/linear_model/plot_polynomial_interpolation.py
function f (line 58) | def f(x):
function g (line 172) | def g(x):
FILE: examples/linear_model/plot_sgd_early_stopping.py
function load_mnist (line 59) | def load_mnist(n_samples=None, class_0="0", class_1="8"):
function fit_and_score (line 74) | def fit_and_score(estimator, max_iter, X_train, X_test, y_train, y_test):
FILE: examples/linear_model/plot_sgd_iris.py
function plot_hyperplane (line 77) | def plot_hyperplane(c, color):
FILE: examples/linear_model/plot_sgd_loss_functions.py
function modified_huber_loss (line 15) | def modified_huber_loss(y_true, y_pred):
FILE: examples/linear_model/plot_tweedie_regression_insurance_claims.py
function load_mtpl2 (line 65) | def load_mtpl2(n_samples=100000):
function plot_obs_pred (line 94) | def plot_obs_pred(
function score_estimator (line 150) | def score_estimator(
function lorenz_curve (line 573) | def lorenz_curve(y_true, y_pred, exposure):
FILE: examples/manifold/plot_lle_digits.py
function plot_embedding (line 51) | def plot_embedding(X, title, ax):
FILE: examples/miscellaneous/plot_multilabel.py
function plot_hyperplane (line 45) | def plot_hyperplane(clf, min_x, max_x, linestyle, label):
function plot_subfigure (line 54) | def plot_subfigure(X, Y, subplot, title, transform):
FILE: examples/mixture/plot_concentration_prior.py
function plot_ellipses (line 43) | def plot_ellipses(ax, weights, means, covars):
function plot_results (line 61) | def plot_results(ax1, ax2, estimator, X, y, title, plot_title=False):
FILE: examples/mixture/plot_gmm.py
function plot_results (line 39) | def plot_results(X, Y_, means, covariances, index, title):
FILE: examples/mixture/plot_gmm_covariances.py
function make_ellipses (line 46) | def make_ellipses(gmm, ax):
FILE: examples/mixture/plot_gmm_sin.py
function plot_results (line 54) | def plot_results(X, Y, means, covariances, index, title):
function plot_samples (line 82) | def plot_samples(X, Y, n_components, index, title):
FILE: examples/model_selection/plot_cv_indices.py
function visualize_groups (line 59) | def visualize_groups(classes, groups, name):
function plot_cv_indices (line 98) | def plot_cv_indices(cv, X, y, group, ax, n_splits, lw=10):
FILE: examples/model_selection/plot_grid_search_refit_callable.py
function lower_bound (line 33) | def lower_bound(cv_results):
function best_low_complexity (line 57) | def best_low_complexity(cv_results):
FILE: examples/model_selection/plot_grid_search_stats.py
function corrected_std (line 168) | def corrected_std(differences, n_train, n_test):
function compute_corrected_ttest (line 193) | def compute_corrected_ttest(differences, df, n_train, n_test):
FILE: examples/model_selection/plot_learning_curve.py
function plot_learning_curve (line 29) | def plot_learning_curve(
FILE: examples/model_selection/plot_randomized_search.py
function report (line 41) | def report(results, n_top=3):
FILE: examples/model_selection/plot_successive_halving_heatmap.py
function make_heatmap (line 55) | def make_heatmap(ax, gs, is_sh=False, make_cbar=False):
FILE: examples/model_selection/plot_underfitting_overfitting.py
function true_fun (line 32) | def true_fun(X):
FILE: examples/neighbors/approximate_nearest_neighbors.py
class NMSlibTransformer (line 72) | class NMSlibTransformer(TransformerMixin, BaseEstimator):
method __init__ (line 75) | def __init__(self, n_neighbors=5, metric="euclidean", method="sw-graph...
method fit (line 81) | def fit(self, X):
method transform (line 98) | def transform(self, X):
class AnnoyTransformer (line 118) | class AnnoyTransformer(TransformerMixin, BaseEstimator):
method __init__ (line 121) | def __init__(self, n_neighbors=5, metric="euclidean", n_trees=10, sear...
method fit (line 127) | def fit(self, X):
method transform (line 135) | def transform(self, X):
method fit_transform (line 138) | def fit_transform(self, X, y=None):
method _transform (line 141) | def _transform(self, X):
function test_transformers (line 175) | def test_transformers():
function load_mnist (line 192) | def load_mnist(n_samples):
function run_benchmark (line 199) | def run_benchmark():
FILE: examples/neighbors/plot_kde_1d.py
function format_func (line 99) | def format_func(x, loc):
FILE: examples/neighbors/plot_nca_illustration.py
function link_thickness_i (line 53) | def link_thickness_i(X, i):
function relate_point (line 64) | def relate_point(X, i, ax):
FILE: examples/neighbors/plot_species_kde.py
function construct_grids (line 58) | def construct_grids(batch):
FILE: examples/neural_networks/plot_mlp_training_curves.py
function plot_on_dataset (line 92) | def plot_on_dataset(X, y, ax, name):
FILE: examples/neural_networks/plot_rbm_logistic_classification.py
function nudge_dataset (line 48) | def nudge_dataset(X, Y):
FILE: examples/preprocessing/plot_all_scaling.py
function create_axes (line 122) | def create_axes(title, figsize=(16, 6)):
function plot_distribution (line 165) | def plot_distribution(axes, X, y, hist_nbins=50, title="", x0_label="", ...
function make_plot (line 208) | def make_plot(item_idx):
FILE: examples/preprocessing/plot_discretization_classification.py
function get_name (line 55) | def get_name(estimator):
FILE: examples/release_highlights/plot_release_highlights_0_22_0.py
function test_sklearn_compatible_estimator (line 248) | def test_sklearn_compatible_estimator(estimator, check):
FILE: examples/semi_supervised/plot_semi_supervised_newsgroups.py
function eval_and_print_metrics (line 66) | def eval_and_print_metrics(clf, X_train, y_train, X_test, y_test):
FILE: examples/svm/plot_custom_kernel.py
function my_kernel (line 22) | def my_kernel(X, Y):
FILE: examples/svm/plot_iris_svc.py
function make_meshgrid (line 42) | def make_meshgrid(x, y, h=0.02):
function plot_contours (line 61) | def plot_contours(ax, clf, xx, yy, **params):
FILE: examples/svm/plot_rbf_parameters.py
class MidpointNormalize (line 93) | class MidpointNormalize(Normalize):
method __init__ (line 94) | def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
method __call__ (line 98) | def __call__(self, value, clip=None):
FILE: examples/svm/plot_weighted_samples.py
function plot_decision_function (line 22) | def plot_decision_function(classifier, sample_weight, axis, title):
FILE: examples/text/plot_document_classification_20newsgroups.py
function is_interactive (line 101) | def is_interactive():
function size_mb (line 154) | def size_mb(docs):
function trim (line 213) | def trim(s):
function benchmark (line 223) | def benchmark(clf):
FILE: examples/text/plot_document_clustering.py
function is_interactive (line 125) | def is_interactive():
FILE: examples/text/plot_hashing_vs_dict_vectorizer.py
function n_nonzero_columns (line 33) | def n_nonzero_columns(X):
function tokens (line 38) | def tokens(doc):
function token_freqs (line 47) | def token_freqs(doc):
FILE: maint_tools/create_issue_from_juint.py
function get_issue (line 38) | def get_issue():
function create_or_update_issue (line 48) | def create_or_update_issue(body):
FILE: maint_tools/sort_whats_new.py
function entry_sort_key (line 12) | def entry_sort_key(s):
FILE: maint_tools/test_docstrings.py
function get_all_methods (line 223) | def get_all_methods():
function _is_checked_function (line 242) | def _is_checked_function(item):
function get_all_functions_names (line 256) | def get_all_functions_names():
function filter_errors (line 287) | def filter_errors(errors, method, Estimator=None):
function repr_errors (line 326) | def repr_errors(res, estimator=None, method: Optional[str] = None) -> str:
function test_function_docstring (line 382) | def test_function_docstring(function_name, request):
function test_docstring (line 400) | def test_docstring(Estimator, method, request):
FILE: setup.py
class CleanCommand (line 91) | class CleanCommand(Clean):
method run (line 94) | def run(self):
class build_ext_subclass (line 130) | class build_ext_subclass(build_ext):
method finalize_options (line 131) | def finalize_options(self):
method build_extensions (line 143) | def build_extensions(self):
function configuration (line 177) | def configuration(parent_package="", top_path=None):
function check_package_status (line 206) | def check_package_status(package, min_version):
function setup_package (line 246) | def setup_package():
FILE: sklearn/__check_build/__init__.py
function raise_build_error (line 16) | def raise_build_error(e):
FILE: sklearn/__check_build/setup.py
function configuration (line 7) | def configuration(parent_package="", top_path=None):
FILE: sklearn/__init__.py
function setup_module (line 132) | def setup_module(module):
FILE: sklearn/_build_utils/__init__.py
function _check_cython_version (line 22) | def _check_cython_version():
function cythonize_extensions (line 40) | def cythonize_extensions(top_path, config):
function gen_from_templates (line 90) | def gen_from_templates(templates):
FILE: sklearn/_build_utils/openmp_helpers.py
function get_openmp_flag (line 18) | def get_openmp_flag(compiler):
function check_openmp_support (line 47) | def check_openmp_support():
FILE: sklearn/_build_utils/pre_build_helpers.py
function _get_compiler (line 17) | def _get_compiler():
function compile_test_program (line 46) | def compile_test_program(code, extra_preargs=[], extra_postargs=[]):
function basic_check_build (line 100) | def basic_check_build():
FILE: sklearn/_config.py
function _get_threadlocal_config (line 16) | def _get_threadlocal_config():
function get_config (line 24) | def get_config():
function set_config (line 42) | def set_config(
function config_context (line 101) | def config_context(
FILE: sklearn/_loss/glm_distribution.py
class ExponentialDispersionModel (line 19) | class ExponentialDispersionModel(metaclass=ABCMeta):
method in_y_range (line 48) | def in_y_range(self, y):
method unit_variance (line 69) | def unit_variance(self, y_pred):
method unit_deviance (line 91) | def unit_deviance(self, y, y_pred, check_input=False):
method unit_deviance_derivative (line 116) | def unit_deviance_derivative(self, y, y_pred):
method deviance (line 134) | def deviance(self, y, y_pred, weights=1):
method deviance_derivative (line 158) | def deviance_derivative(self, y, y_pred, weights=1):
class TweedieDistribution (line 178) | class TweedieDistribution(ExponentialDispersionModel):
method __init__ (line 204) | def __init__(self, power=0):
method power (line 208) | def power(self):
method power (line 212) | def power(self, power):
method unit_variance (line 238) | def unit_variance(self, y_pred):
method unit_deviance (line 249) | def unit_deviance(self, y, y_pred, check_input=False):
class NormalDistribution (line 336) | class NormalDistribution(TweedieDistribution):
method __init__ (line 339) | def __init__(self):
class PoissonDistribution (line 343) | class PoissonDistribution(TweedieDistribution):
method __init__ (line 346) | def __init__(self):
class GammaDistribution (line 350) | class GammaDistribution(TweedieDistribution):
method __init__ (line 353) | def __init__(self):
class InverseGaussianDistribution (line 357) | class InverseGaussianDistribution(TweedieDistribution):
method __init__ (line 360) | def __init__(self):
FILE: sklearn/_loss/tests/test_glm_distribution.py
function test_family_bounds (line 33) | def test_family_bounds(family, expected):
function test_invalid_distribution_bound (line 39) | def test_invalid_distribution_bound():
function test_tweedie_distribution_power (line 46) | def test_tweedie_distribution_power():
function test_deviance_zero (line 81) | def test_deviance_zero(family, chk_values):
function test_deviance_derivative (line 102) | def test_deviance_derivative(family):
FILE: sklearn/base.py
function clone (line 33) | def clone(estimator, *, safe=True):
function _pprint (line 99) | def _pprint(params, offset=0, printer=repr):
class BaseEstimator (line 149) | class BaseEstimator:
method _get_param_names (line 160) | def _get_param_names(cls):
method get_params (line 190) | def get_params(self, deep=True):
method set_params (line 214) | def set_params(self, **params):
method __repr__ (line 258) | def __repr__(self, N_CHAR_MAX=700):
method __getstate__ (line 310) | def __getstate__(self):
method __setstate__ (line 321) | def __setstate__(self, state):
method _more_tags (line 341) | def _more_tags(self):
method _get_tags (line 344) | def _get_tags(self):
method _check_n_features (line 355) | def _check_n_features(self, X, reset):
method _check_feature_names (line 401) | def _check_feature_names(self, X, *, reset):
method _validate_data (line 491) | def _validate_data(
method _repr_html_ (line 601) | def _repr_html_(self):
method _repr_html_inner (line 616) | def _repr_html_inner(self):
method _repr_mimebundle_ (line 623) | def _repr_mimebundle_(self, **kwargs):
class ClassifierMixin (line 631) | class ClassifierMixin:
method score (line 636) | def score(self, X, y, sample_weight=None):
method _more_tags (line 664) | def _more_tags(self):
class RegressorMixin (line 668) | class RegressorMixin:
method score (line 673) | def score(self, X, y, sample_weight=None):
method _more_tags (line 719) | def _more_tags(self):
class ClusterMixin (line 723) | class ClusterMixin:
method fit_predict (line 728) | def fit_predict(self, X, y=None):
method _more_tags (line 750) | def _more_tags(self):
class BiclusterMixin (line 754) | class BiclusterMixin:
method biclusters_ (line 758) | def biclusters_(self):
method get_indices (line 765) | def get_indices(self, i):
method get_shape (line 786) | def get_shape(self, i):
method get_submatrix (line 805) | def get_submatrix(self, i, data):
class TransformerMixin (line 832) | class TransformerMixin:
method fit_transform (line 835) | def fit_transform(self, X, y=None, **fit_params):
class _OneToOneFeatureMixin (line 869) | class _OneToOneFeatureMixin:
method get_feature_names_out (line 876) | def get_feature_names_out(self, input_features=None):
class _ClassNamePrefixFeaturesOutMixin (line 898) | class _ClassNamePrefixFeaturesOutMixin:
method get_feature_names_out (line 904) | def get_feature_names_out(self, input_features=None):
class DensityMixin (line 923) | class DensityMixin:
method score (line 928) | def score(self, X, y=None):
class OutlierMixin (line 946) | class OutlierMixin:
method fit_predict (line 951) | def fit_predict(self, X, y=None):
class MetaEstimatorMixin (line 973) | class MetaEstimatorMixin:
class MultiOutputMixin (line 978) | class MultiOutputMixin:
method _more_tags (line 981) | def _more_tags(self):
class _UnstableArchMixin (line 985) | class _UnstableArchMixin:
method _more_tags (line 988) | def _more_tags(self):
function is_classifier (line 996) | def is_classifier(estimator):
function is_regressor (line 1012) | def is_regressor(estimator):
function is_outlier_detector (line 1028) | def is_outlier_detector(estimator):
function _is_pairwise (line 1044) | def _is_pairwise(estimator):
FILE: sklearn/calibration.py
class CalibratedClassifierCV (line 53) | class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEs...
method __init__ (line 238) | def __init__(
method fit (line 253) | def fit(self, X, y, sample_weight=None):
method predict_proba (line 402) | def predict_proba(self, X):
method predict (line 430) | def predict(self, X):
method _more_tags (line 449) | def _more_tags(self):
function _fit_classifier_calibrator_pair (line 461) | def _fit_classifier_calibrator_pair(
function _get_prediction_method (line 527) | def _get_prediction_method(clf):
function _compute_predictions (line 557) | def _compute_predictions(pred_method, method_name, X, n_classes):
function _fit_calibrator (line 595) | def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=...
class _CalibratedClassifier (line 647) | class _CalibratedClassifier:
method __init__ (line 670) | def __init__(self, base_estimator, calibrators, *, classes, method="si...
method predict_proba (line 676) | def predict_proba(self, X):
function _sigmoid_calibration (line 728) | def _sigmoid_calibration(predictions, y, sample_weight=None):
class _SigmoidCalibration (line 798) | class _SigmoidCalibration(RegressorMixin, BaseEstimator):
method fit (line 810) | def fit(self, X, y, sample_weight=None):
method predict (line 836) | def predict(self, T):
function calibration_curve (line 853) | def calibration_curve(
class CalibrationDisplay (line 969) | class CalibrationDisplay:
method __init__ (line 1043) | def __init__(
method plot (line 1052) | def plot(self, *, ax=None, name=None, ref_line=True, **kwargs):
method from_estimator (line 1114) | def from_estimator(
method from_predictions (line 1240) | def from_predictions(
FILE: sklearn/cluster/_affinity_propagation.py
function _equal_similarities_and_preferences (line 24) | def _equal_similarities_and_preferences(S, preference):
function affinity_propagation (line 38) | def affinity_propagation(
class AffinityPropagation (line 267) | class AffinityPropagation(ClusterMixin, BaseEstimator):
method __init__ (line 397) | def __init__(
method _pairwise (line 426) | def _pairwise(self):
method _more_tags (line 429) | def _more_tags(self):
method fit (line 432) | def fit(self, X, y=None):
method predict (line 503) | def predict(self, X):
method fit_predict (line 536) | def fit_predict(self, X, y=None):
FILE: sklearn/cluster/_agglomerative.py
function _fix_connectivity (line 35) | def _fix_connectivity(X, connectivity, affinity):
function _single_linkage_tree (line 110) | def _single_linkage_tree(
function ward_tree (line 170) | def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=...
function linkage_tree (line 385) | def linkage_tree(
function _complete_linkage (line 663) | def _complete_linkage(*args, **kwargs):
function _average_linkage (line 668) | def _average_linkage(*args, **kwargs):
function _single_linkage (line 673) | def _single_linkage(*args, **kwargs):
function _hc_cut (line 689) | def _hc_cut(n_clusters, children, n_leaves):
class AgglomerativeClustering (line 740) | class AgglomerativeClustering(ClusterMixin, BaseEstimator):
method __init__ (line 878) | def __init__(
method fit (line 899) | def fit(self, X, y=None):
method _fit (line 920) | def _fit(self, X):
method fit_predict (line 1033) | def fit_predict(self, X, y=None):
class FeatureAgglomeration (line 1057) | class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransfo...
method __init__ (line 1196) | def __init__(
method fit (line 1221) | def fit(self, X, y=None):
method fit_predict (line 1242) | def fit_predict(self):
FILE: sklearn/cluster/_bicluster.py
function _scale_normalize (line 25) | def _scale_normalize(X):
function _bistochastic_normalize (line 46) | def _bistochastic_normalize(X, max_iter=1000, tol=1e-5):
function _log_normalize (line 67) | def _log_normalize(X):
class BaseSpectral (line 83) | class BaseSpectral(BiclusterMixin, BaseEstimator, metaclass=ABCMeta):
method __init__ (line 87) | def __init__(
method _check_parameters (line 105) | def _check_parameters(self):
method fit (line 114) | def fit(self, X, y=None):
method _svd (line 135) | def _svd(self, array, n_components, n_discard):
method _k_means (line 172) | def _k_means(self, data, n_clusters):
method _more_tags (line 192) | def _more_tags(self):
class SpectralCoclustering (line 206) | class SpectralCoclustering(BaseSpectral):
method __init__ (line 316) | def __init__(
method _fit (line 331) | def _fit(self, X):
class SpectralBiclustering (line 349) | class SpectralBiclustering(BaseSpectral):
method __init__ (line 474) | def __init__(
method _check_parameters (line 495) | def _check_parameters(self):
method _fit (line 536) | def _fit(self, X):
method _fit_best_piecewise (line 579) | def _fit_best_piecewise(self, vectors, n_best, n_clusters):
method _project_and_cluster (line 597) | def _project_and_cluster(self, data, vectors, n_clusters):
FILE: sklearn/cluster/_birch.py
function _iterate_sparse_X (line 23) | def _iterate_sparse_X(X):
function _split_node (line 41) | def _split_node(node, threshold, branching_factor):
class _CFNode (line 97) | class _CFNode:
method __init__ (line 145) | def __init__(self, *, threshold, branching_factor, is_leaf, n_features):
method append_subcluster (line 160) | def append_subcluster(self, subcluster):
method update_split_subclusters (line 172) | def update_split_subclusters(self, subcluster, new_subcluster1, new_su...
method insert_cf_subcluster (line 182) | def insert_cf_subcluster(self, subcluster):
class _CFSubcluster (line 250) | class _CFSubcluster:
method __init__ (line 286) | def __init__(self, *, linear_sum=None):
method update (line 299) | def update(self, subcluster):
method merge_subcluster (line 306) | def merge_subcluster(self, nominee_cluster, threshold):
method radius (line 338) | def radius(self):
class Birch (line 345) | class Birch(ClusterMixin, TransformerMixin, BaseEstimator):
method __init__ (line 465) | def __init__(
method fit_ (line 486) | def fit_(self):
method partial_fit_ (line 495) | def partial_fit_(self):
method fit (line 498) | def fit(self, X, y=None):
method _fit (line 543) | def _fit(self, X, partial):
method _get_leaves (line 606) | def _get_leaves(self):
method partial_fit (line 622) | def partial_fit(self, X=None, y=None):
method _check_fit (line 650) | def _check_fit(self, X):
method predict (line 661) | def predict(self, X):
method transform (line 687) | def transform(self, X):
method _global_clustering (line 709) | def _global_clustering(self, X=None):
FILE: sklearn/cluster/_dbscan.py
function dbscan (line 25) | def dbscan(
class DBSCAN (line 166) | class DBSCAN(ClusterMixin, BaseEstimator):
method __init__ (line 302) | def __init__(
method fit (line 323) | def fit(self, X, y=None, sample_weight=None):
method fit_predict (line 434) | def fit_predict(self, X, y=None, sample_weight=None):
FILE: sklearn/cluster/_feature_agglomeration.py
class AgglomerationTransform (line 18) | class AgglomerationTransform(TransformerMixin):
method transform (line 23) | def transform(self, X):
method inverse_transform (line 57) | def inverse_transform(self, Xred):
FILE: sklearn/cluster/_kmeans.py
function kmeans_plusplus (line 52) | def kmeans_plusplus(
function _kmeans_plusplus (line 148) | def _kmeans_plusplus(X, n_clusters, x_squared_norms, random_state, n_loc...
function _tolerance (line 246) | def _tolerance(X, tol):
function k_means (line 257) | def k_means(
function _kmeans_single_elkan (line 383) | def _kmeans_single_elkan(
function _kmeans_single_lloyd (line 544) | def _kmeans_single_lloyd(
function _labels_inertia (line 684) | def _labels_inertia(X, sample_weight, x_squared_norms, centers, n_thread...
function _labels_inertia_threadpool_limit (line 751) | def _labels_inertia_threadpool_limit(
class KMeans (line 763) | class KMeans(TransformerMixin, ClusterMixin, BaseEstimator):
method __init__ (line 911) | def __init__(
method _check_params (line 935) | def _check_params(self, X):
method _validate_center_shape (line 993) | def _validate_center_shape(self, X, centers):
method _check_test_data (line 1006) | def _check_test_data(self, X):
method _check_mkl_vcomp (line 1017) | def _check_mkl_vcomp(self, X, n_samples):
method _init_centroids (line 1052) | def _init_centroids(self, X, x_squared_norms, init, random_state, init...
method fit (line 1111) | def fit(self, X, y=None, sample_weight=None):
method fit_predict (line 1232) | def fit_predict(self, X, y=None, sample_weight=None):
method fit_transform (line 1257) | def fit_transform(self, X, y=None, sample_weight=None):
method transform (line 1281) | def transform(self, X):
method _transform (line 1303) | def _transform(self, X):
method predict (line 1307) | def predict(self, X, sample_weight=None):
method score (line 1338) | def score(self, X, y=None, sample_weight=None):
method _more_tags (line 1368) | def _more_tags(self):
function _mini_batch_step (line 1378) | def _mini_batch_step(
class MiniBatchKMeans (line 1502) | class MiniBatchKMeans(KMeans):
method __init__ (line 1687) | def __init__(
method counts_ (line 1725) | def counts_(self):
method init_size_ (line 1733) | def init_size_(self):
method random_state_ (line 1741) | def random_state_(self):
method _check_params (line 1744) | def _check_params(self, X):
method _mini_batch_convergence (line 1787) | def _mini_batch_convergence(
method _random_reassign (line 1852) | def _random_reassign(self):
method fit (line 1868) | def fit(self, X, y=None, sample_weight=None):
method partial_fit (line 2024) | def partial_fit(self, X, y=None, sample_weight=None):
method predict (line 2124) | def predict(self, X, sample_weight=None):
method _more_tags (line 2161) | def _more_tags(self):
FILE: sklearn/cluster/_mean_shift.py
function estimate_bandwidth (line 31) | def estimate_bandwidth(X, *, quantile=0.3, n_samples=None, random_state=...
function _mean_shift_single_seed (line 87) | def _mean_shift_single_seed(my_mean, X, nbrs, max_iter):
function mean_shift (line 110) | def mean_shift(
function get_bin_seeds (line 203) | def get_bin_seeds(X, bin_size, min_bin_freq=1):
class MeanShift (line 256) | class MeanShift(ClusterMixin, BaseEstimator):
method __init__ (line 383) | def __init__(
method fit (line 402) | def fit(self, X, y=None):
method predict (line 499) | def predict(self, X):
FILE: sklearn/cluster/_optics.py
class OPTICS (line 26) | class OPTICS(ClusterMixin, BaseEstimator):
method __init__ (line 224) | def __init__(
method fit (line 257) | def fit(self, X, y=None):
function _validate_size (line 348) | def _validate_size(size, n_samples, param_name):
function _compute_core_distances_ (line 362) | def _compute_core_distances_(X, neighbors, min_samples, working_memory):
function compute_optics_graph (line 398) | def compute_optics_graph(
function _set_reach_dist (line 578) | def _set_reach_dist(
function cluster_optics_dbscan (line 623) | def cluster_optics_dbscan(*, reachability, core_distances, ordering, eps):
function cluster_optics_xi (line 661) | def cluster_optics_xi(
function _extend_region (line 741) | def _extend_region(steep_point, xward_point, start, min_samples):
function _update_filter_sdas (line 804) | def _update_filter_sdas(sdas, mib, xi_complement, reachability_plot):
function _correct_predecessor (line 818) | def _correct_predecessor(reachability_plot, predecessor_plot, ordering, ...
function _xi_cluster (line 840) | def _xi_cluster(
function _extract_xi_labels (line 995) | def _extract_xi_labels(ordering, clusters):
FILE: sklearn/cluster/_spectral.py
function cluster_qr (line 25) | def cluster_qr(vectors):
function discretize (line 57) | def discretize(
function spectral_clustering (line 193) | def spectral_clustering(
class SpectralClustering (line 379) | class SpectralClustering(ClusterMixin, BaseEstimator):
method __init__ (line 590) | def __init__(
method fit (line 625) | def fit(self, X, y=None):
method fit_predict (line 704) | def fit_predict(self, X, y=None):
method _more_tags (line 728) | def _more_tags(self):
method _pairwise (line 741) | def _pairwise(self):
FILE: sklearn/cluster/setup.py
function configuration (line 8) | def configuration(parent_package="", top_path=None):
FILE: sklearn/cluster/tests/common.py
function generate_clustered_data (line 13) | def generate_clustered_data(
FILE: sklearn/cluster/tests/test_affinity_propagation.py
function test_affinity_propagation (line 31) | def test_affinity_propagation():
function test_affinity_propagation_affinity_shape (line 68) | def test_affinity_propagation_affinity_shape():
function test_affinity_propagation_params_validation (line 92) | def test_affinity_propagation_params_validation(input, params, err_type,...
function test_affinity_propagation_predict (line 98) | def test_affinity_propagation_predict():
function test_affinity_propagation_predict_error (line 106) | def test_affinity_propagation_predict_error():
function test_affinity_propagation_fit_non_convergence (line 121) | def test_affinity_propagation_fit_non_convergence():
function test_affinity_propagation_equal_mutual_similarities (line 136) | def test_affinity_propagation_equal_mutual_similarities():
function test_affinity_propagation_predict_non_convergence (line 168) | def test_affinity_propagation_predict_non_convergence():
function test_affinity_propagation_non_convergence_regressiontest (line 185) | def test_affinity_propagation_non_convergence_regressiontest():
function test_equal_similarities_and_preferences (line 191) | def test_equal_similarities_and_preferences():
function test_affinity_propagation_random_state (line 212) | def test_affinity_propagation_random_state():
function test_affinity_propagation_convergence_warning_dense_sparse (line 233) | def test_affinity_propagation_convergence_warning_dense_sparse(centers):
function test_affinity_propagation_float32 (line 246) | def test_affinity_propagation_float32():
function test_sparse_input_for_predict (line 259) | def test_sparse_input_for_predict():
function test_sparse_input_for_fit_predict (line 268) | def test_sparse_input_for_fit_predict():
function test_affinity_propagation_pairwise_is_deprecated (line 279) | def test_affinity_propagation_pairwise_is_deprecated():
FILE: sklearn/cluster/tests/test_bicluster.py
class MockBiclustering (line 26) | class MockBiclustering(BiclusterMixin, BaseEstimator):
method __init__ (line 28) | def __init__(self):
method get_indices (line 31) | def get_indices(self, i):
function test_get_submatrix (line 39) | def test_get_submatrix():
function _test_shape_indices (line 54) | def _test_shape_indices(model):
function test_spectral_coclustering (line 63) | def test_spectral_coclustering():
function test_spectral_biclustering (line 91) | def test_spectral_biclustering():
function _do_scale_test (line 131) | def _do_scale_test(scaled):
function _do_bistochastic_test (line 142) | def _do_bistochastic_test(scaled):
function test_scale_normalize (line 148) | def test_scale_normalize():
function test_bistochastic_normalize (line 158) | def test_bistochastic_normalize():
function test_log_normalize (line 168) | def test_log_normalize():
function test_fit_best_piecewise (line 177) | def test_fit_best_piecewise():
function test_project_and_cluster (line 184) | def test_project_and_cluster():
function test_perfect_checkerboard (line 193) | def test_perfect_checkerboard():
function test_errors (line 223) | def test_errors(args):
function test_wrong_shape (line 231) | def test_wrong_shape():
function test_n_features_in_ (line 239) | def test_n_features_in_(est):
FILE: sklearn/cluster/tests/test_birch.py
function test_n_samples_leaves_roots (line 22) | def test_n_samples_leaves_roots():
function test_partial_fit (line 35) | def test_partial_fit():
function test_birch_predict (line 52) | def test_birch_predict():
function test_n_clusters (line 69) | def test_n_clusters():
function test_sparse_X (line 98) | def test_sparse_X():
function test_partial_fit_second_call_error_checks (line 112) | def test_partial_fit_second_call_error_checks():
function check_branching_factor (line 124) | def check_branching_factor(node, branching_factor):
function test_branching_factor (line 132) | def test_branching_factor():
function check_threshold (line 146) | def check_threshold(birch_instance, threshold):
function test_threshold (line 156) | def test_threshold():
function test_birch_n_clusters_long_int (line 168) | def test_birch_n_clusters_long_int():
function test_birch_fit_attributes_deprecated (line 178) | def test_birch_fit_attributes_deprecated(attribute):
function test_birch_params_validation (line 217) | def test_birch_params_validation(params, err_type, err_msg):
FILE: sklearn/cluster/tests/test_dbscan.py
function test_dbscan_similarity (line 28) | def test_dbscan_similarity():
function test_dbscan_feature (line 52) | def test_dbscan_feature():
function test_dbscan_sparse (line 74) | def test_dbscan_sparse():
function test_dbscan_sparse_precomputed (line 82) | def test_dbscan_sparse_precomputed(include_self):
function test_dbscan_sparse_precomputed_different_eps (line 97) | def test_dbscan_sparse_precomputed_different_eps():
function test_dbscan_input_not_modified (line 116) | def test_dbscan_input_not_modified(use_sparse, metric):
function test_dbscan_no_core_samples (line 129) | def test_dbscan_no_core_samples():
function test_dbscan_callable (line 141) | def test_dbscan_callable():
function test_dbscan_metric_params (line 166) | def test_dbscan_metric_params():
function test_dbscan_balltree (line 227) | def test_dbscan_balltree():
function test_input_validation (line 266) | def test_input_validation():
function test_dbscan_badargs (line 279) | def test_dbscan_badargs(args):
function test_pickle (line 285) | def test_pickle():
function test_boundaries (line 291) | def test_boundaries():
function test_weighted_dbscan (line 302) | def test_weighted_dbscan():
function test_dbscan_core_samples_toy (line 371) | def test_dbscan_core_samples_toy(algorithm):
function test_dbscan_precomputed_metric_with_degenerate_input_arrays (line 400) | def test_dbscan_precomputed_metric_with_degenerate_input_arrays():
function test_dbscan_precomputed_metric_with_initial_rows_zero (line 412) | def test_dbscan_precomputed_metric_with_initial_rows_zero():
function test_dbscan_params_validation (line 460) | def test_dbscan_params_validation(params, err_type, err_msg):
FILE: sklearn/cluster/tests/test_feature_agglomeration.py
function test_feature_agglomeration (line 11) | def test_feature_agglomeration():
FILE: sklearn/cluster/tests/test_hierarchical.py
function test_linkage_misc (line 53) | def test_linkage_misc():
function test_structured_linkage_tree (line 80) | def test_structured_linkage_tree():
function test_unstructured_linkage_tree (line 103) | def test_unstructured_linkage_tree():
function test_height_linkage_tree (line 127) | def test_height_linkage_tree():
function test_agglomerative_clustering_wrong_arg_memory (line 141) | def test_agglomerative_clustering_wrong_arg_memory():
function test_zero_cosine_linkage_tree (line 153) | def test_zero_cosine_linkage_tree():
function test_agglomerative_clustering_distances (line 165) | def test_agglomerative_clustering_distances(
function test_agglomerative_clustering (line 193) | def test_agglomerative_clustering():
function test_agglomerative_clustering_memory_mapped (line 287) | def test_agglomerative_clustering_memory_mapped():
function test_ward_agglomeration (line 297) | def test_ward_agglomeration():
function test_single_linkage_clustering (line 318) | def test_single_linkage_clustering():
function assess_same_labelling (line 335) | def assess_same_labelling(cut1, cut2):
function test_sparse_scikit_vs_scipy (line 347) | def test_sparse_scikit_vs_scipy():
function test_vector_scikit_single_vs_scipy_single (line 388) | def test_vector_scikit_single_vs_scipy_single(seed):
function test_mst_linkage_core_memory_mapped (line 414) | def test_mst_linkage_core_memory_mapped(metric):
function test_identical_points (line 432) | def test_identical_points():
function test_connectivity_propagation (line 452) | def test_connectivity_propagation():
function test_ward_tree_children_order (line 483) | def test_ward_tree_children_order():
function test_ward_linkage_tree_return_distance (line 503) | def test_ward_linkage_tree_return_distance():
function test_connectivity_fixing_non_lil (line 621) | def test_connectivity_fixing_non_lil():
function test_int_float_dict (line 634) | def test_int_float_dict():
function test_connectivity_callable (line 651) | def test_connectivity_callable():
function test_connectivity_ignores_diagonal (line 664) | def test_connectivity_ignores_diagonal():
function test_compute_full_tree (line 676) | def test_compute_full_tree():
function test_n_components (line 702) | def test_n_components():
function test_agg_n_clusters (line 714) | def test_agg_n_clusters():
function test_affinity_passed_to_fix_connectivity (line 728) | def test_affinity_passed_to_fix_connectivity():
function test_agglomerative_clustering_with_distance_threshold (line 755) | def test_agglomerative_clustering_with_distance_threshold(linkage):
function test_small_distance_threshold (line 793) | def test_small_distance_threshold():
function test_cluster_distances_with_distance_threshold (line 810) | def test_cluster_distances_with_distance_threshold():
function test_agglomerative_clustering_with_distance_threshold_edge_case (line 841) | def test_agglomerative_clustering_with_distance_threshold_edge_case(
function test_dist_threshold_invalid_parameters (line 853) | def test_dist_threshold_invalid_parameters():
function test_invalid_shape_precomputed_dist_matrix (line 868) | def test_invalid_shape_precomputed_dist_matrix():
function test_precomputed_connectivity_affinity_with_2_connected_components (line 877) | def test_precomputed_connectivity_affinity_with_2_connected_components():
FILE: sklearn/cluster/tests/test_k_means.py
function test_kmeans_results (line 57) | def test_kmeans_results(array_constr, algo, dtype):
function test_kmeans_relocated_clusters (line 82) | def test_kmeans_relocated_clusters(array_constr, algo):
function test_relocate_empty_clusters (line 106) | def test_relocate_empty_clusters(array_constr):
function test_kmeans_elkan_results (line 152) | def test_kmeans_elkan_results(distribution, array_constr, tol):
function test_kmeans_convergence (line 176) | def test_kmeans_convergence(algorithm):
function test_minibatch_update_consistency (line 194) | def test_minibatch_update_consistency():
function _check_fitted_model (line 266) | def _check_fitted_model(km):
function test_all_init (line 287) | def test_all_init(Estimator, data, init):
function test_minibatch_kmeans_partial_fit_init (line 301) | def test_minibatch_kmeans_partial_fit_init(init):
function test_fortran_aligned_data (line 314) | def test_fortran_aligned_data(Estimator):
function test_k_means_fit_predict (line 341) | def test_k_means_fit_predict(algo, dtype, constructor, seed, max_iter, t...
function test_minibatch_kmeans_verbose (line 359) | def test_minibatch_kmeans_verbose():
function test_kmeans_verbose (line 372) | def test_kmeans_verbose(algorithm, tol, capsys):
function test_minibatch_kmeans_warning_init_size (line 397) | def test_minibatch_kmeans_warning_init_size():
function test_warning_n_init_precomputed_centers (line 406) | def test_warning_n_init_precomputed_centers(Estimator):
function test_minibatch_sensible_reassign (line 416) | def test_minibatch_sensible_reassign():
function test_minibatch_reassign (line 445) | def test_minibatch_reassign(data):
function test_minibatch_with_many_reassignments (line 498) | def test_minibatch_with_many_reassignments():
function test_minibatch_kmeans_init_size (line 512) | def test_minibatch_kmeans_init_size():
function test_minibatch_declared_convergence (line 531) | def test_minibatch_declared_convergence(capsys, tol, max_no_improvement):
function test_minibatch_iter_steps (line 558) | def test_minibatch_iter_steps():
function test_kmeans_copyx (line 583) | def test_kmeans_copyx():
function test_score_max_iter (line 595) | def test_score_max_iter(Estimator):
function test_predict (line 616) | def test_predict(Estimator, algorithm, init, dtype, array_constr):
function test_dense_sparse (line 642) | def test_dense_sparse(Estimator):
function test_predict_dense_sparse (line 658) | def test_predict_dense_sparse(Estimator, init):
function test_integer_input (line 677) | def test_integer_input(Estimator, array_constr, dtype, init):
function test_transform (line 704) | def test_transform(Estimator):
function test_fit_transform (line 722) | def test_fit_transform(Estimator):
function test_n_init (line 729) | def test_n_init():
function test_k_means_function (line 745) | def test_k_means_function():
function test_float_precision (line 761) | def test_float_precision(Estimator, data):
function test_centers_not_mutated (line 797) | def test_centers_not_mutated(Estimator, dtype):
function test_kmeans_init_fitted_centers (line 811) | def test_kmeans_init_fitted_centers(data):
function test_kmeans_warns_less_centers_than_unique_points (line 820) | def test_kmeans_warns_less_centers_than_unique_points():
function _sort_centers (line 837) | def _sort_centers(centers):
function test_weighted_vs_repeated (line 841) | def test_weighted_vs_repeated():
function test_unit_weights_vs_no_weights (line 865) | def test_unit_weights_vs_no_weights(Estimator, data):
function test_scaled_weights (line 880) | def test_scaled_weights(Estimator, data):
function test_kmeans_elkan_iter_attribute (line 893) | def test_kmeans_elkan_iter_attribute():
function test_kmeans_empty_cluster_relocated (line 903) | def test_kmeans_empty_cluster_relocated(array_constr):
function test_result_equal_in_diff_n_threads (line 918) | def test_result_equal_in_diff_n_threads(Estimator):
function test_minibatch_kmeans_deprecated_attributes (line 932) | def test_minibatch_kmeans_deprecated_attributes(attr):
function test_warning_elkan_1_cluster (line 945) | def test_warning_elkan_1_cluster():
function test_k_means_1_iteration (line 958) | def test_k_means_1_iteration(array_constr, algo):
function test_euclidean_distance (line 987) | def test_euclidean_distance(dtype, squared):
function test_inertia (line 1012) | def test_inertia(dtype):
function test_sample_weight_unchanged (line 1037) | def test_sample_weight_unchanged(Estimator):
function test_wrong_params (line 1080) | def test_wrong_params(Estimator, param, match):
function test_kmeans_wrong_params (line 1093) | def test_kmeans_wrong_params(param, match):
function test_minibatch_kmeans_wrong_params (line 1109) | def test_minibatch_kmeans_wrong_params(param, match):
function test_kmeans_plusplus_wrong_params (line 1131) | def test_kmeans_plusplus_wrong_params(param, match):
function test_kmeans_plusplus_output (line 1138) | def test_kmeans_plusplus_output(data, dtype):
function test_kmeans_plusplus_norms (line 1161) | def test_kmeans_plusplus_norms(x_squared_norms):
function test_kmeans_plusplus_dataorder (line 1168) | def test_kmeans_plusplus_dataorder():
function test_is_same_clustering (line 1179) | def test_is_same_clustering():
FILE: sklearn/cluster/tests/test_mean_shift.py
function test_estimate_bandwidth (line 36) | def test_estimate_bandwidth():
function test_estimate_bandwidth_1sample (line 42) | def test_estimate_bandwidth_1sample():
function test_mean_shift (line 53) | def test_mean_shift(bandwidth, cluster_all, expected, first_cluster_label):
function test_mean_shift_negative_bandwidth (line 69) | def test_mean_shift_negative_bandwidth():
function test_estimate_bandwidth_with_sparse_matrix (line 77) | def test_estimate_bandwidth_with_sparse_matrix():
function test_parallel (line 85) | def test_parallel():
function test_meanshift_predict (line 106) | def test_meanshift_predict():
function test_meanshift_all_orphans (line 114) | def test_meanshift_all_orphans():
function test_unfitted (line 124) | def test_unfitted():
function test_cluster_intensity_tie (line 131) | def test_cluster_intensity_tie():
function test_bin_seeds (line 141) | def test_bin_seeds():
function test_max_iter (line 182) | def test_max_iter(max_iter):
function test_mean_shift_zero_bandwidth (line 194) | def test_mean_shift_zero_bandwidth():
FILE: sklearn/cluster/tests/test_optics.py
function test_extend_downward (line 41) | def test_extend_downward(r_plot, end):
function test_extend_upward (line 60) | def test_extend_upward(r_plot, end):
function test_the_extract_xi_labels (line 79) | def test_the_extract_xi_labels(ordering, clusters, expected):
function test_extract_xi (line 85) | def test_extract_xi():
function test_cluster_hierarchy_ (line 138) | def test_cluster_hierarchy_():
function test_correct_number_of_clusters (line 152) | def test_correct_number_of_clusters():
function test_minimum_number_of_sample_check (line 180) | def test_minimum_number_of_sample_check():
function test_bad_extract (line 193) | def test_bad_extract():
function test_bad_reachability (line 207) | def test_bad_reachability():
function test_nowarn_if_metric_bool_data_bool (line 219) | def test_nowarn_if_metric_bool_data_bool():
function test_warn_if_metric_bool_data_no_bool (line 232) | def test_warn_if_metric_bool_data_no_bool():
function test_nowarn_if_metric_no_bool (line 247) | def test_nowarn_if_metric_no_bool():
function test_close_extract (line 262) | def test_close_extract():
function test_dbscan_optics_parity (line 278) | def test_dbscan_optics_parity(eps, min_samples):
function test_min_samples_edge_case (line 304) | def test_min_samples_edge_case():
function test_min_cluster_size (line 326) | def test_min_cluster_size(min_cluster_size):
function test_min_cluster_size_invalid (line 341) | def test_min_cluster_size_invalid(min_cluster_size):
function test_min_cluster_size_invalid2 (line 347) | def test_min_cluster_size_invalid2():
function test_processing_order (line 353) | def test_processing_order():
function test_compare_to_ELKI (line 363) | def test_compare_to_ELKI():
function test_wrong_cluster_method (line 765) | def test_wrong_cluster_method():
function test_extract_dbscan (line 771) | def test_extract_dbscan():
function test_precomputed_dists (line 786) | def test_precomputed_dists():
FILE: sklearn/cluster/tests/test_spectral.py
function test_spectral_clustering (line 34) | def test_spectral_clustering(eigen_solver, assign_labels):
function test_spectral_unknown_mode (line 67) | def test_spectral_unknown_mode():
function test_spectral_unknown_assign_labels (line 86) | def test_spectral_unknown_assign_labels():
function test_spectral_clustering_sparse (line 106) | def test_spectral_clustering_sparse(assign_labels):
function test_precomputed_nearest_neighbors_filtering (line 128) | def test_precomputed_nearest_neighbors_filtering():
function test_affinities (line 154) | def test_affinities():
function test_cluster_qr (line 201) | def test_cluster_qr():
function test_cluster_qr_permutation_invariance (line 219) | def test_cluster_qr_permutation_invariance():
function test_discretize (line 232) | def test_discretize(n_samples):
function test_spectral_clustering_with_arpack_amg_solvers (line 264) | def test_spectral_clustering_with_arpack_amg_solvers():
function test_n_components (line 300) | def test_n_components():
function test_verbose (line 324) | def test_verbose(assign_labels, capsys):
function test_pairwise_is_deprecated (line 343) | def test_pairwise_is_deprecated(affinity):
function test_spectral_clustering_np_matrix_raises (line 350) | def test_spectral_clustering_np_matrix_raises():
function test_spectral_clustering_not_infinite_loop (line 360) | def test_spectral_clustering_not_infinite_loop(capsys, monkeypatch):
FILE: sklearn/compose/_column_transformer.py
class ColumnTransformer (line 40) | class ColumnTransformer(TransformerMixin, _BaseComposition):
method __init__ (line 198) | def __init__(
method _transformers (line 218) | def _transformers(self):
method _transformers (line 228) | def _transformers(self, value):
method get_params (line 234) | def get_params(self, deep=True):
method set_params (line 254) | def set_params(self, **kwargs):
method _iter (line 274) | def _iter(self, fitted=False, replace_strings=False, column_as_strings...
method _validate_transformers (line 320) | def _validate_transformers(self):
method _validate_column_callables (line 342) | def _validate_column_callables(self, X):
method _validate_remainder (line 357) | def _validate_remainder(self, X):
method named_transformers_ (line 379) | def named_transformers_(self):
method get_feature_names (line 393) | def get_feature_names(self):
method _get_feature_name_out_for_transformer (line 426) | def _get_feature_name_out_for_transformer(
method get_feature_names_out (line 456) | def get_feature_names_out(self, input_features=None):
method _update_fitted_transformers (line 527) | def _update_fitted_transformers(self, transformers):
method _validate_output (line 550) | def _validate_output(self, result):
method _record_output_indices (line 565) | def _record_output_indices(self, Xs):
method _log_message (line 587) | def _log_message(self, name, idx, total):
method _fit_transform (line 592) | def _fit_transform(self, X, y, func, fitted=False, column_as_strings=F...
method fit (line 623) | def fit(self, X, y=None):
method fit_transform (line 645) | def fit_transform(self, X, y=None):
method transform (line 701) | def transform(self, X):
method _hstack (line 763) | def _hstack(self, Xs):
method _sk_visual_block_ (line 793) | def _sk_visual_block_(self):
function _check_X (line 816) | def _check_X(X):
function _is_empty_column_selection (line 823) | def _is_empty_column_selection(column):
function _get_transformer_list (line 841) | def _get_transformer_list(estimators):
function make_column_transformer (line 853) | def make_column_transformer(
class make_column_selector (line 966) | class make_column_selector:
method __init__ (line 1021) | def __init__(self, pattern=None, *, dtype_include=None, dtype_exclude=...
method __call__ (line 1026) | def __call__(self, df):
FILE: sklearn/compose/_target.py
class TransformedTargetRegressor (line 19) | class TransformedTargetRegressor(RegressorMixin, BaseEstimator):
method __init__ (line 130) | def __init__(
method _fit_transformer (line 145) | def _fit_transformer(self, y):
method fit (line 189) | def fit(self, X, y, **fit_params):
method predict (line 254) | def predict(self, X, **predict_params):
method _more_tags (line 289) | def _more_tags(self):
method n_features_in_ (line 302) | def n_features_in_(self):
FILE: sklearn/compose/tests/test_column_transformer.py
class Trans (line 28) | class Trans(BaseEstimator):
method fit (line 29) | def fit(self, X, y=None):
method transform (line 32) | def transform(self, X, y=None):
class DoubleTrans (line 42) | class DoubleTrans(BaseEstimator):
method fit (line 43) | def fit(self, X, y=None):
method transform (line 46) | def transform(self, X):
class SparseMatrixTrans (line 50) | class SparseMatrixTrans(BaseEstimator):
method fit (line 51) | def fit(self, X, y=None):
method transform (line 54) | def transform(self, X, y=None):
class TransNo2D (line 59) | class TransNo2D(BaseEstimator):
method fit (line 60) | def fit(self, X, y=None):
method transform (line 63) | def transform(self, X, y=None):
class TransRaise (line 67) | class TransRaise(BaseEstimator):
method fit (line 68) | def fit(self, X, y=None):
method transform (line 71) | def transform(self, X, y=None):
function test_column_transformer (line 75) | def test_column_transformer():
function test_column_transformer_dataframe (line 141) | def test_column_transformer_dataframe():
function test_column_transformer_empty_columns (line 272) | def test_column_transformer_empty_columns(pandas, column_selection, call...
function test_column_transformer_output_indices (line 319) | def test_column_transformer_output_indices():
function test_column_transformer_output_indices_df (line 362) | def test_column_transformer_output_indices_df():
function test_column_transformer_sparse_array (line 393) | def test_column_transformer_sparse_array():
function test_column_transformer_list (line 416) | def test_column_transformer_list():
function test_column_transformer_sparse_stacking (line 436) | def test_column_transformer_sparse_stacking():
function test_column_transformer_mixed_cols_sparse (line 461) | def test_column_transformer_mixed_cols_sparse():
function test_column_transformer_sparse_threshold (line 483) | def test_column_transformer_sparse_threshold():
function test_column_transformer_error_msg_1D (line 535) | def test_column_transformer_error_msg_1D():
function test_2D_transformer_output (line 552) | def test_2D_transformer_output():
function test_2D_transformer_output_pandas (line 566) | def test_2D_transformer_output_pandas():
function test_column_transformer_invalid_columns (line 583) | def test_column_transformer_invalid_columns(remainder):
function test_column_transformer_invalid_transformer (line 618) | def test_column_transformer_invalid_transformer():
function test_make_column_transformer (line 633) | def test_make_column_transformer():
function test_make_column_transformer_pandas (line 643) | def test_make_column_transformer_pandas():
function test_make_column_transformer_kwargs (line 653) | def test_make_column_transformer_kwargs():
function test_make_column_transformer_remainder_transformer (line 683) | def test_make_column_transformer_remainder_transformer():
function test_column_transformer_get_set_params (line 693) | def test_column_transformer_get_set_params():
function test_column_transformer_named_estimators (line 740) | def test_column_transformer_named_estimators():
function test_column_transformer_cloning (line 760) | def test_column_transformer_cloning():
function test_column_transformer_get_feature_names (line 777) | def test_column_transformer_get_feature_names(get_names):
function test_column_transformer_get_feature_names_pipeline (line 808) | def test_column_transformer_get_feature_names_pipeline(X, keys):
function test_column_transformer_get_feature_names_dataframe (line 854) | def test_column_transformer_get_feature_names_dataframe():
function test_column_transformer_special_strings (line 904) | def test_column_transformer_special_strings():
function test_column_transformer_remainder (line 941) | def test_column_transformer_remainder():
function test_column_transformer_remainder_numpy (line 1001) | def test_column_transformer_remainder_numpy(key):
function test_column_transformer_remainder_pandas (line 1029) | def test_column_transformer_remainder_pandas(key):
function test_column_transformer_remainder_transformer (line 1051) | def test_column_transformer_remainder_transformer(key):
function test_column_transformer_no_remaining_remainder_transformer (line 1068) | def test_column_transformer_no_remaining_remainder_transformer():
function test_column_transformer_drops_all_remainder_transformer (line 1079) | def test_column_transformer_drops_all_remainder_transformer():
function test_column_transformer_sparse_remainder_transformer (line 1095) | def test_column_transformer_sparse_remainder_transformer():
function test_column_transformer_drop_all_sparse_remainder_transformer (line 1116) | def test_column_transformer_drop_all_sparse_remainder_transformer():
function test_column_transformer_get_set_params_with_remainder (line 1134) | def test_column_transformer_get_set_params_with_remainder():
function test_column_transformer_no_estimators (line 1178) | def test_column_transformer_no_estimators():
function test_column_transformer_verbose (line 1261) | def test_column_transformer_verbose(est, pattern, method, capsys):
function test_column_transformer_no_estimators_set_params (line 1274) | def test_column_transformer_no_estimators_set_params():
function test_column_transformer_callable_specifier (line 1279) | def test_column_transformer_callable_specifier():
function test_column_transformer_callable_specifier_dataframe (line 1295) | def test_column_transformer_callable_specifier_dataframe():
function test_column_transformer_negative_column_indexes (line 1315) | def test_column_transformer_negative_column_indexes():
function test_column_transformer_mask_indexing (line 1328) | def test_column_transformer_mask_indexing(array_type):
function test_n_features_in (line 1341) | def test_n_features_in():
function test_make_column_selector_with_select_dtypes (line 1370) | def test_make_column_selector_with_select_dtypes(cols, pattern, include,...
function test_column_transformer_with_make_column_selector (line 1389) | def test_column_transformer_with_make_column_selector():
function test_make_column_selector_error (line 1420) | def test_make_column_selector_error():
function test_make_column_selector_pickle (line 1428) | def test_make_column_selector_pickle():
function test_feature_names_empty_columns (line 1460) | def test_feature_names_empty_columns(empty_col, get_names, expected_names):
function test_feature_names_out_pandas (line 1487) | def test_feature_names_out_pandas(selector):
function test_feature_names_out_non_pandas (line 1500) | def test_feature_names_out_non_pandas(selector):
function test_sk_visual_block_remainder (line 1510) | def test_sk_visual_block_remainder(remainder):
function test_sk_visual_block_remainder_drop (line 1522) | def test_sk_visual_block_remainder_drop():
function test_sk_visual_block_remainder_fitted_pandas (line 1533) | def test_sk_visual_block_remainder_fitted_pandas(remainder):
function test_sk_visual_block_remainder_fitted_numpy (line 1556) | def test_sk_visual_block_remainder_fitted_numpy(remainder):
function test_column_transformers_get_feature_names_deprecated (line 1571) | def test_column_transformers_get_feature_names_deprecated():
function test_column_transformer_reordered_column_names_remainder (line 1584) | def test_column_transformer_reordered_column_names_remainder(
function test_feature_name_validation_missing_columns_drop_passthough (line 1620) | def test_feature_name_validation_missing_columns_drop_passthough():
function test_get_feature_names_empty_selection (line 1657) | def test_get_feature_names_empty_selection(selector):
function test_feature_names_in_ (line 1666) | def test_feature_names_in_():
class TransWithNames (line 1686) | class TransWithNames(Trans):
method __init__ (line 1687) | def __init__(self, feature_names_out=None):
method get_feature_names_out (line 1690) | def get_feature_names_out(self, input_features=None):
function test_verbose_feature_names_out_true (line 1763) | def test_verbose_feature_names_out_true(transformers, remainder, expecte...
function test_verbose_feature_names_out_false (line 1838) | def test_verbose_feature_names_out_false(transformers, remainder, expect...
function test_verbose_feature_names_out_false_errors (line 1926) | def test_verbose_feature_names_out_false_errors(
FILE: sklearn/compose/tests/test_target.py
function test_transform_target_regressor_error (line 27) | def test_transform_target_regressor_error():
function test_transform_target_regressor_invertible (line 60) | def test_transform_target_regressor_invertible():
function _check_standard_scaled (line 83) | def _check_standard_scaled(y, y_pred):
function _check_shifted_by_one (line 89) | def _check_shifted_by_one(y, y_pred):
function test_transform_target_regressor_functions (line 93) | def test_transform_target_regressor_functions():
function test_transform_target_regressor_functions_multioutput (line 112) | def test_transform_target_regressor_functions_multioutput():
function test_transform_target_regressor_1d_transformer (line 133) | def test_transform_target_regressor_1d_transformer(X, y):
function test_transform_target_regressor_2d_transformer (line 164) | def test_transform_target_regressor_2d_transformer(X, y):
function test_transform_target_regressor_2d_transformer_multioutput (line 198) | def test_transform_target_regressor_2d_transformer_multioutput():
function test_transform_target_regressor_3d_target (line 224) | def test_transform_target_regressor_3d_target():
function test_transform_target_regressor_multi_to_single (line 245) | def test_transform_target_regressor_multi_to_single():
class DummyCheckerArrayTransformer (line 277) | class DummyCheckerArrayTransformer(TransformerMixin, BaseEstimator):
method fit (line 278) | def fit(self, X, y=None):
method transform (line 282) | def transform(self, X):
method inverse_transform (line 286) | def inverse_transform(self, X):
class DummyCheckerListRegressor (line 291) | class DummyCheckerListRegressor(DummyRegressor):
method fit (line 292) | def fit(self, X, y, sample_weight=None):
method predict (line 296) | def predict(self, X):
function test_transform_target_regressor_ensure_y_array (line 301) | def test_transform_target_regressor_ensure_y_array():
class DummyTransformer (line 319) | class DummyTransformer(TransformerMixin, BaseEstimator):
method __init__ (line 322) | def __init__(self, fit_counter=0):
method fit (line 325) | def fit(self, X, y=None):
method transform (line 329) | def transform(self, X):
method inverse_transform (line 332) | def inverse_transform(self, X):
function test_transform_target_regressor_count_fit (line 337) | def test_transform_target_regressor_count_fit(check_inverse):
class DummyRegressorWithExtraFitParams (line 348) | class DummyRegressorWithExtraFitParams(DummyRegressor):
method fit (line 349) | def fit(self, X, y, sample_weight=None, check_input=True):
function test_transform_target_regressor_pass_fit_parameters (line 356) | def test_transform_target_regressor_pass_fit_parameters():
function test_transform_target_regressor_route_pipeline (line 366) | def test_transform_target_regressor_route_pipeline():
class DummyRegressorWithExtraPredictParams (line 380) | class DummyRegressorWithExtraPredictParams(DummyRegressor):
method predict (line 381) | def predict(self, X, check_input=True):
function test_transform_target_regressor_pass_extra_predict_parameters (line 389) | def test_transform_target_regressor_pass_extra_predict_parameters():
FILE: sklearn/conftest.py
function _fetch_fixture (line 41) | def _fetch_fixture(f):
function pytest_collection_modifyitems (line 68) | def pytest_collection_modifyitems(config, items):
function pyplot (line 186) | def pyplot():
function pytest_runtest_setup (line 204) | def pytest_runtest_setup(item):
function pytest_configure (line 225) | def pytest_configure(config):
FILE: sklearn/covariance/_elliptic_envelope.py
class EllipticEnvelope (line 12) | class EllipticEnvelope(OutlierMixin, MinCovDet):
method __init__ (line 141) | def __init__(
method fit (line 158) | def fit(self, X, y=None):
method decision_function (line 184) | def decision_function(self, X):
method score_samples (line 204) | def score_samples(self, X):
method predict (line 220) | def predict(self, X):
method score (line 240) | def score(self, X, y, sample_weight=None):
FILE: sklearn/covariance/_empirical_covariance.py
function log_likelihood (line 24) | def log_likelihood(emp_cov, precision):
function empirical_covariance (line 51) | def empirical_covariance(X, *, assume_centered=False):
class EmpiricalCovariance (line 100) | class EmpiricalCovariance(BaseEstimator):
method __init__ (line 170) | def __init__(self, *, store_precision=True, assume_centered=False):
method _set_covariance (line 174) | def _set_covariance(self, covariance):
method get_precision (line 195) | def get_precision(self):
method fit (line 209) | def fit(self, X, y=None):
method score (line 236) | def score(self, X_test, y=None):
method error_norm (line 267) | def error_norm(self, comp_cov, norm="frobenius", scaling=True, squared...
method mahalanobis (line 318) | def mahalanobis(self, X):
FILE: sklearn/covariance/_graph_lasso.py
function _objective (line 33) | def _objective(mle, precision_, alpha):
function _dual_gap (line 46) | def _dual_gap(emp_cov, precision_, alpha):
function alpha_max (line 58) | def alpha_max(emp_cov):
class _DictWithDeprecatedKeys (line 77) | class _DictWithDeprecatedKeys(dict):
method __init__ (line 82) | def __init__(self, **kwargs):
method __getitem__ (line 86) | def __getitem__(self, key):
method _set_deprecated (line 95) | def _set_deprecated(self, value, *, new_key, deprecated_key):
function graphical_lasso (line 101) | def graphical_lasso(
class GraphicalLasso (line 336) | class GraphicalLasso(EmpiricalCovariance):
method __init__ (line 432) | def __init__(
method fit (line 451) | def fit(self, X, y=None):
function graphical_lasso_path (line 489) | def graphical_lasso_path(
class GraphicalLassoCV (line 608) | class GraphicalLassoCV(GraphicalLasso):
method __init__ (line 813) | def __init__(
method fit (line 840) | def fit(self, X, y=None):
method grid_scores_ (line 1018) | def grid_scores_(self):
method cv_alphas_ (line 1038) | def cv_alphas_(self):
FILE: sklearn/covariance/_robust_covariance.py
function c_step (line 29) | def c_step(
function _c_step (line 107) | def _c_step(
function select_candidates (line 208) | def select_candidates(
function fast_mcd (line 354) | def fast_mcd(
class MinCovDet (line 576) | class MinCovDet(EmpiricalCovariance):
method __init__ (line 703) | def __init__(
method fit (line 716) | def fit(self, X, y=None):
method correct_covariance (line 769) | def correct_covariance(self, data):
method reweight_covariance (line 809) | def reweight_covariance(self, data):
FILE: sklearn/covariance/_shrunk_covariance.py
function shrunk_covariance (line 27) | def shrunk_covariance(emp_cov, shrinkage=0.1):
class ShrunkCovariance (line 64) | class ShrunkCovariance(EmpiricalCovariance):
method __init__ (line 148) | def __init__(self, *, store_precision=True, assume_centered=False, shr...
method fit (line 154) | def fit(self, X, y=None):
function ledoit_wolf_shrinkage (line 188) | def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
function ledoit_wolf (line 283) | def ledoit_wolf(X, *, assume_centered=False, block_size=1000):
class LedoitWolf (line 347) | class LedoitWolf(EmpiricalCovariance):
method __init__ (line 448) | def __init__(self, *, store_precision=True, assume_centered=False, blo...
method fit (line 454) | def fit(self, X, y=None):
function oas (line 488) | def oas(X, *, assume_centered=False):
class OAS (line 553) | class OAS(EmpiricalCovariance):
method fit (line 656) | def fit(self, X, y=None):
FILE: sklearn/covariance/tests/test_covariance.py
function test_covariance (line 32) | def test_covariance():
function test_shrunk_covariance (line 79) | def test_shrunk_covariance():
function test_ledoit_wolf (line 112) | def test_ledoit_wolf():
function _naive_ledoit_wolf_shrinkage (line 198) | def _naive_ledoit_wolf_shrinkage(X):
function test_ledoit_wolf_small (line 223) | def test_ledoit_wolf_small():
function test_ledoit_wolf_large (line 233) | def test_ledoit_wolf_large():
function test_ledoit_wolf_empty_array (line 251) | def test_ledoit_wolf_empty_array(ledoit_wolf_fitting_function):
function test_oas (line 258) | def test_oas():
function test_EmpiricalCovariance_validates_mahalanobis (line 331) | def test_EmpiricalCovariance_validates_mahalanobis():
FILE: sklearn/covariance/tests/test_elliptic_envelope.py
function test_elliptic_envelope (line 15) | def test_elliptic_envelope():
function test_score_samples (line 36) | def test_score_samples():
FILE: sklearn/covariance/tests/test_graphical_lasso.py
function test_graphical_lasso (line 25) | def test_graphical_lasso(random_state=0):
function test_graphical_lasso_iris (line 68) | def test_graphical_lasso_iris():
function test_graph_lasso_2D (line 95) | def test_graph_lasso_2D():
function test_graphical_lasso_iris_singular (line 109) | def test_graphical_lasso_iris_singular():
function test_graphical_lasso_cv (line 141) | def test_graphical_lasso_cv(random_state=1):
function test_graphical_lasso_cv_grid_scores_and_cv_alphas_deprecated (line 163) | def test_graphical_lasso_cv_grid_scores_and_cv_alphas_deprecated():
function test_graphical_lasso_cv_scores (line 202) | def test_graphical_lasso_cv_scores(suffix):
function test_graphical_lasso_cv_scores_deprecated (line 239) | def test_graphical_lasso_cv_scores_deprecated():
FILE: sklearn/covariance/tests/test_robust_covariance.py
function test_mcd (line 23) | def test_mcd():
function test_fast_mcd_on_invalid_input (line 43) | def test_fast_mcd_on_invalid_input():
function test_mcd_class_on_invalid_input (line 50) | def test_mcd_class_on_invalid_input():
function launch_mcd_on_dataset (line 58) | def launch_mcd_on_dataset(
function test_mcd_issue1127 (line 86) | def test_mcd_issue1127():
function test_mcd_issue3367 (line 95) | def test_mcd_issue3367():
function test_mcd_support_covariance_is_zero (line 127) | def test_mcd_support_covariance_is_zero():
function test_mcd_increasing_det_warning (line 143) | def test_mcd_increasing_det_warning():
FILE: sklearn/cross_decomposition/_pls.py
function _pinv2_old (line 35) | def _pinv2_old(a):
function _get_first_singular_vectors_power_method (line 53) | def _get_first_singular_vectors_power_method(
function _get_first_singular_vectors_svd (line 112) | def _get_first_singular_vectors_svd(X, Y):
function _center_scale_xy (line 122) | def _center_scale_xy(X, Y, scale=True):
function _svd_flip_1d (line 148) | def _svd_flip_1d(u, v):
class _PLS (line 158) | class _PLS(
method __init__ (line 171) | def __init__(
method fit (line 192) | def fit(self, X, Y):
method transform (line 366) | def transform(self, X, Y=None, copy=True):
method inverse_transform (line 405) | def inverse_transform(self, X, Y=None):
method predict (line 449) | def predict(self, X, copy=True):
method fit_transform (line 479) | def fit_transform(self, X, y=None):
method norm_y_weights (line 505) | def norm_y_weights(self):
method x_mean_ (line 513) | def x_mean_(self):
method y_mean_ (line 521) | def y_mean_(self):
method x_std_ (line 529) | def x_std_(self):
method y_std_ (line 537) | def y_std_(self):
method x_scores_ (line 541) | def x_scores_(self):
method y_scores_ (line 555) | def y_scores_(self):
method _more_tags (line 567) | def _more_tags(self):
class PLSRegression (line 571) | class PLSRegression(_PLS):
method __init__ (line 670) | def __init__(
class PLSCanonical (line 685) | class PLSCanonical(_PLS):
method __init__ (line 799) | def __init__(
class CCA (line 821) | class CCA(_PLS):
method __init__ (line 919) | def __init__(
class PLSSVD (line 934) | class PLSSVD(TransformerMixin, BaseEstimator):
method __init__ (line 1018) | def __init__(self, n_components=2, *, scale=True, copy=True):
method fit (line 1023) | def fit(self, X, Y):
method x_scores_ (line 1091) | def x_scores_(self):
method y_scores_ (line 1101) | def y_scores_(self):
method x_mean_ (line 1109) | def x_mean_(self):
method y_mean_ (line 1117) | def y_mean_(self):
method x_std_ (line 1125) | def x_std_(self):
method y_std_ (line 1133) | def y_std_(self):
method transform (line 1136) | def transform(self, X, Y=None):
method fit_transform (line 1168) | def fit_transform(self, X, y=None):
FILE: sklearn/cross_decomposition/tests/test_pls.py
function assert_matrix_orthogonal (line 21) | def assert_matrix_orthogonal(M):
function test_pls_canonical_basics (line 26) | def test_pls_canonical_basics():
function test_sanity_check_pls_regression (line 66) | def test_sanity_check_pls_regression():
function test_sanity_check_pls_regression_constant_column_Y (line 124) | def test_sanity_check_pls_regression_constant_column_Y():
function test_sanity_check_pls_canonical (line 174) | def test_sanity_check_pls_canonical():
function test_sanity_check_pls_canonical_random (line 236) | def test_sanity_check_pls_canonical_random():
function test_convergence_fail (line 340) | def test_convergence_fail():
function test_attibutes_shapes (line 352) | def test_attibutes_shapes(Est):
function test_univariate_equivalence (line 371) | def test_univariate_equivalence(Est):
function test_copy (line 386) | def test_copy(Est):
function _generate_test_scale_and_stability_datasets (line 424) | def _generate_test_scale_and_stability_datasets():
function test_scale_and_stability (line 458) | def test_scale_and_stability(Est, X, Y):
function test_n_components_bounds (line 473) | def test_n_components_bounds(Est, n_components):
function test_n_components_bounds_pls_regression (line 487) | def test_n_components_bounds_pls_regression(n_components):
function test_scores_deprecations (line 501) | def test_scores_deprecations(Est):
function test_norm_y_weights_deprecation (line 517) | def test_norm_y_weights_deprecation(Est):
function test_mean_and_std_deprecation (line 529) | def test_mean_and_std_deprecation(Estimator, attribute):
function test_singular_value_helpers (line 540) | def test_singular_value_helpers(n_samples, n_features, seed):
function test_one_component_equivalence (line 554) | def test_one_component_equivalence():
function test_svd_flip_1d (line 566) | def test_svd_flip_1d():
function test_loadings_converges (line 581) | def test_loadings_converges():
function test_pls_constant_y (line 597) | def test_pls_constant_y():
FILE: sklearn/datasets/_base.py
function get_data_home (line 34) | def get_data_home(data_home=None) -> str:
function clear_data_home (line 62) | def clear_data_home(data_home=None):
function _convert_data_dataframe (line 75) | def _convert_data_dataframe(
function load_files (line 93) | def load_files(
function load_csv_data (line 247) | def load_csv_data(
function load_gzip_compressed_csv_data (line 314) | def load_gzip_compressed_csv_data(
function load_descr (line 378) | def load_descr(descr_file_name, *, descr_module=DESCR_MODULE):
function load_wine (line 403) | def load_wine(*, return_X_y=False, as_frame=False):
function load_iris (line 520) | def load_iris(*, return_X_y=False, as_frame=False):
function load_breast_cancer (line 639) | def load_breast_cancer(*, return_X_y=False, as_frame=False):
function load_digits (line 783) | def load_digits(*, n_class=10, return_X_y=False, as_frame=False):
function load_diabetes (line 911) | def load_diabetes(*, return_X_y=False, as_frame=False):
function load_linnerud (line 1010) | def load_linnerud(*, return_X_y=False, as_frame=False):
function load_boston (line 1152) | def load_boston(*, return_X_y=False):
function load_sample_images (line 1307) | def load_sample_images():
function load_sample_image (line 1356) | def load_sample_image(image_name):
function _pkl_filepath (line 1397) | def _pkl_filepath(*args, **kwargs):
function _sha256 (line 1414) | def _sha256(path):
function _fetch_remote (line 1427) | def _fetch_remote(remote, dirname=None):
FILE: sklearn/datasets/_california_housing.py
function fetch_california_housing (line 53) | def fetch_california_housing(
FILE: sklearn/datasets/_covtype.py
function fetch_covtype (line 64) | def fetch_covtype(
FILE: sklearn/datasets/_kddcup99.py
function fetch_kddcup99 (line 49) | def fetch_kddcup99(
function _fetch_brute_kddcup99 (line 227) | def _fetch_brute_kddcup99(data_home=None, download_if_missing=True, perc...
function _mkdirp (line 375) | def _mkdirp(d):
FILE: sklearn/datasets/_lfw.py
function _check_fetch_lfw (line 76) | def _check_fetch_lfw(data_home=None, funneled=True, download_if_missing=...
function _load_imgs (line 119) | def _load_imgs(file_paths, slice_, color, resize):
function _fetch_lfw_people (line 182) | def _fetch_lfw_people(
function fetch_lfw_people (line 225) | def fetch_lfw_people(
function _fetch_lfw_pairs (line 353) | def _fetch_lfw_pairs(
function fetch_lfw_pairs (line 405) | def fetch_lfw_pairs(
FILE: sklearn/datasets/_olivetti_faces.py
function fetch_olivetti_faces (line 39) | def fetch_olivetti_faces(
FILE: sklearn/datasets/_openml.py
function _get_local_path (line 44) | def _get_local_path(openml_path: str, data_home: str) -> str:
function _retry_with_clean_cache (line 48) | def _retry_with_clean_cache(openml_path: str, data_home: Optional[str]) ...
function _open_openml_url (line 75) | def _open_openml_url(openml_path: str, data_home: Optional[str]):
class OpenMLError (line 134) | class OpenMLError(ValueError):
function _get_json_content_from_openml_api (line 140) | def _get_json_content_from_openml_api(
function _split_sparse_columns (line 183) | def _split_sparse_columns(
function _sparse_data_to_array (line 219) | def _sparse_data_to_array(
function _convert_arff_data (line 237) | def _convert_arff_data(
function _feature_to_dtype (line 298) | def _feature_to_dtype(feature: Dict[str, str]):
function _convert_arff_data_dataframe (line 316) | def _convert_arff_data_dataframe(
function _get_data_info_by_name (line 375) | def _get_data_info_by_name(
function _get_data_description_by_id (line 442) | def _get_data_description_by_id(
function _get_data_features (line 454) | def _get_data_features(data_id: int, data_home: Optional[str]) -> Openml...
function _get_data_qualities (line 465) | def _get_data_qualities(data_id: int, data_home: Optional[str]) -> Openm...
function _get_num_samples (line 478) | def _get_num_samples(data_qualities: OpenmlQualitiesType) -> int:
function _load_arff_response (line 499) | def _load_arff_response(
function _download_data_to_bunch (line 544) | def _download_data_to_bunch(
function _verify_target_data_type (line 672) | def _verify_target_data_type(features_dict, target_columns):
function _valid_data_column_names (line 699) | def _valid_data_column_names(features_list, target_columns):
function fetch_openml (line 715) | def fetch_openml(
FILE: sklearn/datasets/_rcv1.py
function fetch_rcv1 (line 79) | def fetch_rcv1(
function _inverse_permutation (line 282) | def _inverse_permutation(p):
function _find_permutation (line 291) | def _find_permutation(a, b):
FILE: sklearn/datasets/_samples_generator.py
function _generate_hypercube (line 23) | def _generate_hypercube(samples, dimensions, rng):
function make_classification (line 39) | def make_classification(
function make_multilabel_classification (line 290) | def make_multilabel_classification(
function make_hastie_10_2 (line 458) | def make_hastie_10_2(n_samples=12000, *, random_state=None):
function make_regression (line 506) | def make_regression(
function make_circles (line 640) | def make_circles(
function make_moons (line 723) | def make_moons(n_samples=100, *, shuffle=True, noise=None, random_state=...
function make_blobs (line 792) | def make_blobs(
function make_friedman1 (line 964) | def make_friedman1(n_samples=100, n_features=10, *, noise=0.0, random_st...
function make_friedman2 (line 1031) | def make_friedman2(n_samples=100, *, noise=0.0, random_state=None):
function make_friedman3 (line 1096) | def make_friedman3(n_samples=100, *, noise=0.0, random_state=None):
function make_low_rank_matrix (line 1161) | def make_low_rank_matrix(
function make_sparse_coded_signal (line 1237) | def make_sparse_coded_signal(
function make_sparse_uncorrelated (line 1300) | def make_sparse_uncorrelated(n_samples=100, n_features=10, *, random_sta...
function make_spd_matrix (line 1351) | def make_spd_matrix(n_dim, *, random_state=None):
function make_sparse_spd_matrix (line 1384) | def make_sparse_spd_matrix(
function make_swiss_roll (line 1464) | def make_swiss_roll(n_samples=100, *, noise=0.0, random_state=None, hole...
function make_s_curve (line 1529) | def make_s_curve(n_samples=100, *, noise=0.0, random_state=None):
function make_gaussian_quantiles (line 1571) | def make_gaussian_quantiles(
function _shuffle (line 1667) | def _shuffle(data, random_state=None):
function make_biclusters (line 1676) | def make_biclusters(
function make_checkerboard (line 1773) | def make_checkerboard(
FILE: sklearn/datasets/_species_distributions.py
function _load_coverage (line 77) | def _load_coverage(F, header_length=6, dtype=np.int16):
function _load_csv (line 93) | def _load_csv(F):
function construct_grids (line 113) | def construct_grids(batch):
function fetch_species_distributions (line 140) | def fetch_species_distributions(*, data_home=None, download_if_missing=T...
FILE: sklearn/datasets/_svmlight_format_io.py
function _load_svmlight_file (line 33) | def _load_svmlight_file(*args, **kwargs):
function load_svmlight_file (line 42) | def load_svmlight_file(
function _gen_open (line 177) | def _gen_open(f):
function _open_and_load (line 196) | def _open_and_load(f, dtype, multilabel, zero_based, query_id, offset=0,...
function load_svmlight_files (line 219) | def load_svmlight_files(
function _dump_svmlight (line 366) | def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
function dump_svmlight_file (line 427) | def dump_svmlight_file(
FILE: sklearn/datasets/_twenty_newsgroups.py
function _download_20newsgroups (line 65) | def _download_20newsgroups(target_dir, cache_path):
function strip_newsgroup_header (line 93) | def strip_newsgroup_header(text):
function strip_newsgroup_quoting (line 112) | def strip_newsgroup_quoting(text):
function strip_newsgroup_footer (line 127) | def strip_newsgroup_footer(text):
function fetch_20newsgroups (line 152) | def fetch_20newsgroups(
function fetch_20newsgroups_vectorized (line 334) | def fetch_20newsgroups_vectorized(
FILE: sklearn/datasets/setup.py
function configuration (line 6) | def configuration(parent_package="", top_path=None):
FILE: sklearn/datasets/tests/conftest.py
function hide_available_pandas (line 8) | def hide_available_pandas(monkeypatch):
FILE: sklearn/datasets/tests/test_20news.py
function test_20news (line 19) | def test_20news(fetch_20newsgroups_fxt):
function test_20news_length_consistency (line 51) | def test_20news_length_consistency(fetch_20newsgroups_fxt):
function test_20news_vectorized (line 63) | def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
function test_20news_normalization (line 93) | def test_20news_normalization(fetch_20newsgroups_vectorized_fxt):
function test_20news_as_frame (line 103) | def test_20news_as_frame(fetch_20newsgroups_vectorized_fxt):
function test_as_frame_no_pandas (line 128) | def test_as_frame_no_pandas(fetch_20newsgroups_vectorized_fxt, hide_avai...
function test_outdated_pickle (line 132) | def test_outdated_pickle(fetch_20newsgroups_vectorized_fxt):
FILE: sklearn/datasets/tests/test_base.py
function _remove_dir (line 38) | def _remove_dir(path):
function data_home (line 44) | def data_home(tmpdir_factory):
function load_files_root (line 51) | def load_files_root(tmpdir_factory):
function test_category_dir_1 (line 58) | def test_category_dir_1(load_files_root):
function test_category_dir_2 (line 68) | def test_category_dir_2(load_files_root):
function test_data_home (line 74) | def test_data_home(data_home):
function test_default_empty_load_files (line 89) | def test_default_empty_load_files(load_files_root):
function test_default_load_files (line 96) | def test_default_load_files(test_category_dir_1, test_category_dir_2, lo...
function test_load_files_w_categories_desc_and_encoding (line 106) | def test_load_files_w_categories_desc_and_encoding(
function test_load_files_wo_load_content (line 121) | def test_load_files_wo_load_content(
function test_load_csv_data (line 139) | def test_load_csv_data(
function test_load_csv_data_with_descr (line 149) | def test_load_csv_data_with_descr():
function test_load_gzip_compressed_csv_data (line 175) | def test_load_gzip_compressed_csv_data(filename, kwargs, expected_shape):
function test_load_gzip_compressed_csv_data_with_descr (line 180) | def test_load_gzip_compressed_csv_data_with_descr():
function test_load_sample_images (line 194) | def test_load_sample_images():
function test_load_sample_image (line 210) | def test_load_sample_image():
function test_load_missing_sample_image_error (line 219) | def test_load_missing_sample_image_error():
function test_loader (line 248) | def test_loader(loader_func, data_shape, target_shape, n_target, has_des...
function test_toy_dataset_frame_dtype (line 281) | def test_toy_dataset_frame_dtype(loader_func, data_dtype, target_dtype):
function test_loads_dumps_bunch (line 291) | def test_loads_dumps_bunch():
function test_bunch_pickle_generated_with_0_16_and_read_with_0_17 (line 298) | def test_bunch_pickle_generated_with_0_16_and_read_with_0_17():
function test_bunch_dir (line 319) | def test_bunch_dir():
function test_load_boston_warning (line 326) | def test_load_boston_warning():
function test_load_boston_alternative (line 334) | def test_load_boston_alternative():
FILE: sklearn/datasets/tests/test_california_housing.py
function test_fetch (line 10) | def test_fetch(fetch_california_housing_fxt):
function test_fetch_asframe (line 21) | def test_fetch_asframe(fetch_california_housing_fxt):
function test_pandas_dependency_message (line 31) | def test_pandas_dependency_message(fetch_california_housing_fxt, hide_av...
FILE: sklearn/datasets/tests/test_common.py
function is_pillow_installed (line 11) | def is_pillow_installed():
function check_pandas_dependency_message (line 40) | def check_pandas_dependency_message(fetch_func):
function check_return_X_y (line 54) | def check_return_X_y(bunch, dataset_func):
function check_as_frame (line 61) | def check_as_frame(
function _skip_network_tests (line 89) | def _skip_network_tests():
function _generate_func_supporting_param (line 93) | def _generate_func_supporting_param(param, dataset_type=("load", "fetch")):
function test_common_check_return_X_y (line 119) | def test_common_check_return_X_y(name, dataset_func):
function test_common_check_as_frame (line 127) | def test_common_check_as_frame(name, dataset_func):
function test_common_check_pandas_dependency (line 135) | def test_common_check_pandas_dependency(name, dataset_func):
FILE: sklearn/datasets/tests/test_covtype.py
function test_fetch (line 9) | def test_fetch(fetch_covtype_fxt):
function test_fetch_asframe (line 32) | def test_fetch_asframe(fetch_covtype_fxt):
function test_pandas_dependency_message (line 49) | def test_pandas_dependency_message(fetch_covtype_fxt, hide_available_pan...
FILE: sklearn/datasets/tests/test_kddcup99.py
function test_fetch_kddcup99_percent10 (line 28) | def test_fetch_kddcup99_percent10(
function test_fetch_kddcup99_return_X_y (line 39) | def test_fetch_kddcup99_return_X_y(fetch_kddcup99_fxt):
function test_fetch_kddcup99_as_frame (line 45) | def test_fetch_kddcup99_as_frame(fetch_kddcup99_fxt):
function test_fetch_kddcup99_shuffle (line 50) | def test_fetch_kddcup99_shuffle(fetch_kddcup99_fxt):
function test_pandas_dependency_message (line 67) | def test_pandas_dependency_message(fetch_kddcup99_fxt, hide_available_pa...
function test_corrupted_file_error_message (line 71) | def test_corrupted_file_error_message(fetch_kddcup99_fxt, tmp_path):
FILE: sklearn/datasets/tests/test_lfw.py
function setup_module (line 42) | def setup_module():
function teardown_module (line 108) | def teardown_module():
function test_load_empty_lfw_people (line 116) | def test_load_empty_lfw_people():
function test_load_fake_lfw_people (line 121) | def test_load_fake_lfw_people():
function test_load_fake_lfw_people_too_restrictive (line 179) | def test_load_fake_lfw_people_too_restrictive():
function test_load_empty_lfw_pairs (line 188) | def test_load_empty_lfw_pairs():
function test_load_fake_lfw_pairs (line 193) | def test_load_fake_lfw_pairs():
FILE: sklearn/datasets/tests/test_olivetti_faces.py
function test_olivetti_faces (line 13) | def test_olivetti_faces(fetch_olivetti_faces_fxt):
FILE: sklearn/datasets/tests/test_openml.py
function _test_features_list (line 42) | def _test_features_list(data_id):
function _fetch_dataset_from_openml (line 91) | def _fetch_dataset_from_openml(
class _MockHTTPResponse (line 186) | class _MockHTTPResponse:
method __init__ (line 187) | def __init__(self, data, is_gzip):
method read (line 191) | def read(self, amt=-1):
method close (line 194) | def close(self):
method info (line 197) | def info(self):
method __iter__ (line 202) | def __iter__(self):
method __enter__ (line 205) | def __enter__(self):
method __exit__ (line 208) | def __exit__(self, exc_type, exc_val, exc_tb):
function _monkey_patch_webbased_functions (line 212) | def _monkey_patch_webbased_functions(context, data_id, gzip_response):
function test_feature_to_dtype (line 342) | def test_feature_to_dtype(feature, expected_dtype):
function test_feature_to_dtype_error (line 349) | def test_feature_to_dtype_error(feature):
function test_fetch_openml_iris_pandas (line 358) | def test_fetch_openml_iris_pandas(monkeypatch):
function test_fetch_openml_iris_pandas_equal_to_no_frame (line 403) | def test_fetch_openml_iris_pandas_equal_to_no_frame(monkeypatch):
function test_fetch_openml_iris_multitarget_pandas (line 425) | def test_fetch_openml_iris_multitarget_pandas(monkeypatch):
function test_fetch_openml_anneal_pandas (line 470) | def test_fetch_openml_anneal_pandas(monkeypatch):
function test_fetch_openml_cpu_pandas (line 512) | def test_fetch_openml_cpu_pandas(monkeypatch):
function test_fetch_openml_australian_pandas_error_sparse (line 581) | def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
function test_fetch_openml_as_frame_auto (line 594) | def test_fetch_openml_as_frame_auto(monkeypatch):
function test_convert_arff_data_dataframe_warning_low_memory_pandas (line 611) | def test_convert_arff_data_dataframe_warning_low_memory_pandas(monkeypat...
function test_fetch_openml_adultcensus_pandas_return_X_y (line 626) | def test_fetch_openml_adultcensus_pandas_return_X_y(monkeypatch):
function test_fetch_openml_adultcensus_pandas (line 657) | def test_fetch_openml_adultcensus_pandas(monkeypatch):
function test_fetch_openml_miceprotein_pandas (line 697) | def test_fetch_openml_miceprotein_pandas(monkeypatch):
function test_fetch_openml_emotions_pandas (line 741) | def test_fetch_openml_emotions_pandas(monkeypatch):
function test_fetch_openml_titanic_pandas (line 790) | def test_fetch_openml_titanic_pandas(monkeypatch):
function test_fetch_openml_iris (line 872) | def test_fetch_openml_iris(monkeypatch, gzip_response):
function test_decode_iris (line 888) | def test_decode_iris(monkeypatch):
function test_fetch_openml_iris_multitarget (line 895) | def test_fetch_openml_iris_multitarget(monkeypatch, gzip_response):
function test_fetch_openml_anneal (line 922) | def test_fetch_openml_anneal(monkeypatch, gzip_response):
function test_decode_anneal (line 948) | def test_decode_anneal(monkeypatch):
function test_fetch_openml_anneal_multitarget (line 955) | def test_fetch_openml_anneal_multitarget(monkeypatch, gzip_response):
function test_fetch_openml_cpu (line 982) | def test_fetch_openml_cpu(monkeypatch, gzip_response):
function test_decode_cpu (line 1007) | def test_decode_cpu(monkeypatch):
function test_fetch_openml_australian (line 1014) | def test_fetch_openml_australian(monkeypatch, gzip_response):
function test_fetch_openml_adultcensus (line 1048) | def test_fetch_openml_adultcensus(monkeypatch, gzip_response):
function test_fetch_openml_miceprotein (line 1075) | def test_fetch_openml_miceprotein(monkeypatch, gzip_response):
function test_fetch_openml_emotions (line 1105) | def test_fetch_openml_emotions(monkeypatch, gzip_response):
function test_decode_emotions (line 1138) | def test_decode_emotions(monkeypatch):
function test_open_openml_url_cache (line 1145) | def test_open_openml_url_cache(monkeypatch, gzip_response, tmpdir):
function test_open_openml_url_unlinks_local_path (line 1163) | def test_open_openml_url_unlinks_local_path(
function test_retry_with_clean_cache (line 1185) | def test_retry_with_clean_cache(tmpdir):
function test_retry_with_clean_cache_http_error (line 1208) | def test_retry_with_clean_cache_http_error(tmpdir):
function test_fetch_openml_cache (line 1225) | def test_fetch_openml_cache(monkeypatch, gzip_response, tmpdir):
function test_fetch_openml_notarget (line 1259) | def test_fetch_openml_notarget(monkeypatch, gzip_response):
function test_fetch_openml_inactive (line 1274) | def test_fetch_openml_inactive(monkeypatch, gzip_response):
function test_fetch_nonexiting (line 1291) | def test_fetch_nonexiting(monkeypatch, gzip_response):
function test_raises_illegal_multitarget (line 1302) | def test_raises_illegal_multitarget(monkeypatch, gzip_response):
function test_warn_ignore_attribute (line 1313) | def test_warn_ignore_attribute(monkeypatch, gzip_response):
function test_string_attribute_without_dataframe (line 1353) | def test_string_attribute_without_dataframe(monkeypatch, gzip_response):
function test_dataset_with_openml_error (line 1366) | def test_dataset_with_openml_error(monkeypatch, gzip_response):
function test_dataset_with_openml_warning (line 1375) | def test_dataset_with_openml_warning(monkeypatch, gzip_response):
function test_illegal_column (line 1384) | def test_illegal_column(monkeypatch, gzip_response):
function test_fetch_openml_raises_missing_values_target (line 1396) | def test_fetch_openml_raises_missing_values_target(monkeypatch, gzip_res...
function test_fetch_openml_raises_illegal_argument (line 1404) | def test_fetch_openml_raises_illegal_argument():
function test_fetch_openml_with_ignored_feature (line 1422) | def test_fetch_openml_with_ignored_feature(monkeypatch, gzip_response):
function test_fetch_openml_verify_checksum (line 1442) | def test_fetch_openml_verify_checksum(monkeypatch, as_frame, cache, tmpd...
function test_convert_arff_data_type (line 1484) | def test_convert_arff_data_type():
function test_missing_values_pandas (line 1503) | def test_missing_values_pandas(monkeypatch):
FILE: sklearn/datasets/tests/test_rcv1.py
function test_fetch_rcv1 (line 13) | def test_fetch_rcv1(fetch_rcv1_fxt):
FILE: sklearn/datasets/tests/test_samples_generator.py
function test_make_classification (line 35) | def test_make_classification():
function test_make_classification_informative_features (line 83) | def test_make_classification_informative_features():
function test_make_classification_weights_type (line 208) | def test_make_classification_weights_type(weights, err_type, err_msg):
function test_make_classification_weights_array_or_list_ok (line 214) | def test_make_classification_weights_array_or_list_ok(kwargs):
function test_make_multilabel_classification_return_sequences (line 221) | def test_make_multilabel_classification_return_sequences():
function test_make_multilabel_classification_return_indicator (line 238) | def test_make_multilabel_classification_return_indicator():
function test_make_multilabel_classification_return_indicator_sparse (line 269) | def test_make_multilabel_classification_return_indicator_sparse():
function test_make_multilabel_classification_valid_arguments (line 291) | def test_make_multilabel_classification_valid_arguments(params, err_msg):
function test_make_hastie_10_2 (line 296) | def test_make_hastie_10_2():
function test_make_regression (line 303) | def test_make_regression():
function test_make_regression_multitarget (line 328) | def test_make_regression_multitarget():
function test_make_blobs (line 348) | def test_make_blobs():
function test_make_blobs_n_samples_list (line 366) | def test_make_blobs_n_samples_list():
function test_make_blobs_n_samples_list_with_centers (line 376) | def test_make_blobs_n_samples_list_with_centers():
function test_make_blobs_n_samples_centers_none (line 395) | def test_make_blobs_n_samples_centers_none(n_samples):
function test_make_blobs_return_centers (line 405) | def test_make_blobs_return_centers():
function test_make_blobs_error (line 415) | def test_make_blobs_error():
function test_make_friedman1 (line 438) | def test_make_friedman1():
function test_make_friedman2 (line 453) | def test_make_friedman2():
function test_make_friedman3 (line 464) | def test_make_friedman3():
function test_make_low_rank_matrix (line 475) | def test_make_low_rank_matrix():
function test_make_sparse_coded_signal (line 492) | def test_make_sparse_coded_signal():
function test_make_sparse_uncorrelated (line 505) | def test_make_sparse_uncorrelated():
function test_make_spd_matrix (line 512) | def test_make_spd_matrix():
function test_make_swiss_roll (line 527) | def test_make_swiss_roll(hole):
function test_make_s_curve (line 536) | def test_make_s_curve():
function test_make_biclusters (line 545) | def test_make_biclusters():
function test_make_checkerboard (line 565) | def test_make_checkerboard():
function test_make_moons (line 592) | def test_make_moons():
function test_make_moons_unbalanced (line 602) | def test_make_moons_unbalanced():
function test_make_circles (line 623) | def test_make_circles():
function test_make_circles_unbalanced (line 656) | def test_make_circles_unbalanced():
FILE: sklearn/datasets/tests/test_svmlight_format.py
function _load_svmlight_local_test_file (line 30) | def _load_svmlight_local_test_file(filename, **kwargs):
function test_load_svmlight_file (line 38) | def test_load_svmlight_file():
function test_load_svmlight_file_fd (line 74) | def test_load_svmlight_file_fd():
function test_load_svmlight_file_multilabel (line 92) | def test_load_svmlight_file_multilabel():
function test_load_svmlight_files (line 97) | def test_load_svmlight_files():
function test_load_svmlight_file_n_features (line 116) | def test_load_svmlight_file_n_features():
function test_load_compressed (line 134) | def test_load_compressed():
function test_load_invalid_file (line 162) | def test_load_invalid_file():
function test_load_invalid_order_file (line 167) | def test_load_invalid_order_file():
function test_load_zero_based (line 172) | def test_load_zero_based():
function test_load_zero_based_auto (line 178) | def test_load_zero_based_auto():
function test_load_with_qid (line 193) | def test_load_with_qid():
function test_load_large_qid (line 213) | def test_load_large_qid():
function test_load_invalid_file2 (line 228) | def test_load_invalid_file2():
function test_not_a_filename (line 236) | def test_not_a_filename():
function test_invalid_filename (line 243) | def test_invalid_filename():
function test_dump (line 248) | def test_dump():
function test_dump_multilabel (line 317) | def test_dump_multilabel():
function test_dump_concise (line 331) | def test_dump_concise():
function test_dump_comment (line 362) | def test_dump_comment():
function test_dump_invalid (line 395) | def test_dump_invalid():
function test_dump_query_id (line 408) | def test_dump_query_id():
function test_load_with_long_qid (line 423) | def test_load_with_long_qid():
function test_load_zeros (line 459) | def test_load_zeros():
function test_load_with_offsets (line 475) | def test_load_with_offsets(sparsity, n_samples, n_features):
function test_load_offset_exhaustive_splits (line 511) | def test_load_offset_exhaustive_splits():
function test_load_with_offsets_error (line 553) | def test_load_with_offsets_error():
FILE: sklearn/decomposition/_base.py
class _BasePCA (line 19) | class _BasePCA(
method get_covariance (line 28) | def get_covariance(self):
method get_precision (line 49) | def get_precision(self):
method fit (line 82) | def fit(self, X, y=None):
method transform (line 99) | def transform(self, X):
method inverse_transform (line 127) | def inverse_transform(self, X):
method _n_features_out (line 161) | def _n_features_out(self):
FILE: sklearn/decomposition/_dict_learning.py
function _check_positive_coding (line 26) | def _check_positive_coding(method, positive):
function _sparse_encode (line 33) | def _sparse_encode(
function sparse_encode (line 226) | def sparse_encode(
function _update_dict (line 410) | def _update_dict(
function dict_learning (line 490) | def dict_learning(
function dict_learning_online (line 720) | def dict_learning_online(
class _BaseSparseCoding (line 1017) | class _BaseSparseCoding(_ClassNamePrefixFeaturesOutMixin, TransformerMix...
method __init__ (line 1020) | def __init__(
method _transform (line 1038) | def _transform(self, X, dictionary):
method transform (line 1080) | def transform(self, X):
class SparseCoder (line 1101) | class SparseCoder(_BaseSparseCoding, BaseEstimator):
method __init__ (line 1230) | def __init__(
method fit (line 1253) | def fit(self, X, y=None):
method components_ (line 1280) | def components_(self):
method transform (line 1283) | def transform(self, X, y=None):
method _more_tags (line 1305) | def _more_tags(self):
method n_components_ (line 1309) | def n_components_(self):
method n_features_in_ (line 1314) | def n_features_in_(self):
method _n_features_out (line 1319) | def _n_features_out(self):
class DictionaryLearning (line 1324) | class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
method __init__ (line 1508) | def __init__(
method fit (line 1550) | def fit(self, X, y=None):
method _n_features_out (line 1596) | def _n_features_out(self):
class MiniBatchDictionaryLearning (line 1601) | class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
method __init__ (line 1789) | def __init__(
method fit (line 1832) | def fit(self, X, y=None):
method partial_fit (line 1879) | def partial_fit(self, X, y=None, iter_offset=None):
method _n_features_out (line 1941) | def _n_features_out(self):
FILE: sklearn/decomposition/_factor_analysis.py
class FactorAnalysis (line 35) | class FactorAnalysis(_ClassNamePrefixFeaturesOutMixin, TransformerMixin,...
method __init__ (line 162) | def __init__(
method fit (line 191) | def fit(self, X, y=None):
method transform (line 303) | def transform(self, X):
method get_covariance (line 333) | def get_covariance(self):
method get_precision (line 349) | def get_precision(self):
method score_samples (line 377) | def score_samples(self, X):
method score (line 399) | def score(self, X, y=None):
method _rotate (line 417) | def _rotate(self, components, n_components=None, tol=1e-6):
method _n_features_out (line 430) | def _n_features_out(self):
function _ortho_rotation (line 435) | def _ortho_rotation(components, method="varimax", tol=1e-6, max_iter=100):
FILE: sklearn/decomposition/_fastica.py
function _gs_decorrelation (line 27) | def _gs_decorrelation(w, W, j):
function _sym_decorrelation (line 52) | def _sym_decorrelation(W):
function _ica_def (line 62) | def _ica_def(X, tol, g, fun_args, max_iter, w_init):
function _ica_par (line 97) | def _ica_par(X, tol, g, fun_args, max_iter, w_init):
function _logcosh (line 127) | def _logcosh(x, fun_args=None):
function _exp (line 139) | def _exp(x, fun_args):
function _cube (line 146) | def _cube(x, fun_args):
function fastica (line 150) | def fastica(
class FastICA (line 318) | class FastICA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEs...
method __init__ (line 439) | def __init__(
method _fit (line 463) | def _fit(self, X, compute_sources=False):
method fit_transform (line 632) | def fit_transform(self, X, y=None):
method fit (line 652) | def fit(self, X, y=None):
method transform (line 672) | def transform(self, X, copy=True):
method inverse_transform (line 700) | def inverse_transform(self, X, copy=True):
method _n_features_out (line 726) | def _n_features_out(self):
FILE: sklearn/decomposition/_incremental_pca.py
class IncrementalPCA (line 15) | class IncrementalPCA(_BasePCA):
method __init__ (line 181) | def __init__(self, n_components=None, *, whiten=False, copy=True, batc...
method fit (line 187) | def fit(self, X, y=None):
method partial_fit (line 236) | def partial_fit(self, X, y=None, check_input=True):
method transform (line 353) | def transform(self, X):
FILE: sklearn/decomposition/_kernel_pca.py
class KernelPCA (line 24) | class KernelPCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, Base...
method __init__ (line 240) | def __init__(
method _pairwise (line 284) | def _pairwise(self):
method lambdas_ (line 294) | def lambdas_(self):
method alphas_ (line 303) | def alphas_(self):
method _get_kernel (line 306) | def _get_kernel(self, X, Y=None):
method _fit_transform (line 315) | def _fit_transform(self, K):
method _fit_inverse_transform (line 401) | def _fit_inverse_transform(self, X_transformed, X):
method fit (line 413) | def fit(self, X, y=None):
method fit_transform (line 446) | def fit_transform(self, X, y=None, **params):
method transform (line 477) | def transform(self, X):
method inverse_transform (line 507) | def inverse_transform(self, X):
method _more_tags (line 558) | def _more_tags(self):
method _n_features_out (line 565) | def _n_features_out(self):
FILE: sklearn/decomposition/_lda.py
function _update_doc_distribution (line 34) | def _update_doc_distribution(
class LatentDirichletAllocation (line 141) | class LatentDirichletAllocation(
method __init__ (line 323) | def __init__(
method _check_params (line 360) | def _check_params(self):
method _init_latent_vars (line 380) | def _init_latent_vars(self, n_features):
method _e_step (line 409) | def _e_step(self, X, cal_sstats, random_init, parallel=None):
method _em_step (line 475) | def _em_step(self, X, total_samples, batch_update, parallel=None):
method _more_tags (line 529) | def _more_tags(self):
method _check_non_neg_array (line 532) | def _check_non_neg_array(self, X, reset_n_features, whom):
method partial_fit (line 546) | def partial_fit(self, X, y=None):
method fit (line 593) | def fit(self, X, y=None):
method _unnormalized_transform (line 676) | def _unnormalized_transform(self, X):
method transform (line 693) | def transform(self, X):
method _approx_bound (line 717) | def _approx_bound(self, X, doc_topic_distr, sub_sampling):
method score (line 796) | def score(self, X, y=None):
method _perplexity_precomp_distr (line 821) | def _perplexity_precomp_distr(self, X, doc_topic_distr=None, sub_sampl...
method perplexity (line 865) | def perplexity(self, X, sub_sampling=False):
method _n_features_out (line 894) | def _n_features_out(self):
FILE: sklearn/decomposition/_nmf.py
function norm (line 30) | def norm(x):
function trace_dot (line 43) | def trace_dot(X, Y):
function _check_init (line 56) | def _check_init(A, shape, whom):
function _beta_divergence (line 68) | def _beta_divergence(X, W, H, beta, square_root=False):
function _special_sparse_dot (line 174) | def _special_sparse_dot(W, H, X):
function _compute_regularization (line 195) | def _compute_regularization(alpha, alpha_W, alpha_H, l1_ratio, regulariz...
function _beta_loss_to_float (line 218) | def _beta_loss_to_float(beta_loss):
function _initialize_nmf (line 232) | def _initialize_nmf(X, n_components, init=None, eps=1e-6, random_state=N...
function _update_coordinate_descent (line 383) | def _update_coordinate_descent(X, W, Ht, l1_reg, l2_reg, shuffle, random...
function _fit_coordinate_descent (line 413) | def _fit_coordinate_descent(
function _multiplicative_update_w (line 533) | def _multiplicative_update_w(
function _multiplicative_update_h (line 639) | def _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gam...
function _fit_multiplicative_update (line 721) | def _fit_multiplicative_update(
function non_negative_factorization (line 870) | def non_negative_factorization(
class NMF (line 1112) | class NMF(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstima...
method __init__ (line 1334) | def __init__(
method _more_tags (line 1367) | def _more_tags(self):
method _check_params (line 1370) | def _check_params(self, X):
method _check_w_h (line 1462) | def _check_w_h(self, X, W, H, update_H):
method _scale_regularization (line 1493) | def _scale_regularization(self, X):
method fit_transform (line 1512) | def fit_transform(self, X, y=None, W=None, H=None):
method _fit_transform (line 1554) | def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
method fit (line 1650) | def fit(self, X, y=None, **params):
method transform (line 1674) | def transform(self, X):
method inverse_transform (line 1698) | def inverse_transform(self, W):
method _n_features_out (line 1717) | def _n_features_out(self):
FILE: sklearn/decomposition/_pca.py
function _assess_dimension (line 30) | def _assess_dimension(spectrum, rank, n_samples):
function _infer_dimension (line 104) | def _infer_dimension(spectrum, n_samples):
class PCA (line 116) | class PCA(_BasePCA):
method __init__ (line 354) | def __init__(
method fit (line 375) | def fit(self, X, y=None):
method fit_transform (line 402) | def fit_transform(self, X, y=None):
method _fit (line 436) | def _fit(self, X):
method _fit_full (line 482) | def _fit_full(self, X, n_components):
method _fit_truncated (line 548) | def _fit_truncated(self, X, n_components, svd_solver):
method score_samples (line 624) | def score_samples(self, X):
method score (line 651) | def score(self, X, y=None):
method _more_tags (line 673) | def _more_tags(self):
FILE: sklearn/decomposition/_sparse_pca.py
class SparsePCA (line 14) | class SparsePCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, Base...
method __init__ (line 128) | def __init__(
method fit (line 155) | def fit(self, X, y=None):
method transform (line 207) | def transform(self, X):
method _n_features_out (line 240) | def _n_features_out(self):
class MiniBatchSparsePCA (line 245) | class MiniBatchSparsePCA(SparsePCA):
method __init__ (line 357) | def __init__(
method fit (line 386) | def fit(self, X, y=None):
FILE: sklearn/decomposition/_truncated_svd.py
class TruncatedSVD (line 24) | class TruncatedSVD(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, B...
method __init__ (line 142) | def __init__(
method fit (line 157) | def fit(self, X, y=None):
method fit_transform (line 176) | def fit_transform(self, X, y=None):
method transform (line 239) | def transform(self, X):
method inverse_transform (line 256) | def inverse_transform(self, X):
method _more_tags (line 274) | def _more_tags(self):
method _n_features_out (line 278) | def _n_features_out(self):
FILE: sklearn/decomposition/setup.py
function configuration (line 6) | def configuration(parent_package="", top_path=None):
FILE: sklearn/decomposition/tests/test_dict_learning.py
function test_sparse_encode_shapes_omp (line 37) | def test_sparse_encode_shapes_omp():
function test_dict_learning_shapes (line 48) | def test_dict_learning_shapes():
function test_dict_learning_overcomplete (line 59) | def test_dict_learning_overcomplete():
function test_max_iter (line 65) | def test_max_iter():
function test_dict_learning_lars_positive_parameter (line 122) | def test_dict_learning_lars_positive_parameter():
function test_dict_learning_positivity (line 140) | def test_dict_learning_positivity(transform_algorithm, positive_code, po...
function test_dict_learning_lars_dict_positivity (line 163) | def test_dict_learning_lars_dict_positivity(positive_dict):
function test_dict_learning_lars_code_positivity (line 179) | def test_dict_learning_lars_code_positivity():
function test_dict_learning_reconstruction (line 195) | def test_dict_learning_reconstruction():
function test_dict_learning_reconstruction_parallel (line 211) | def test_dict_learning_reconstruction_parallel():
function test_dict_learning_lassocd_readonly_data (line 229) | def test_dict_learning_lassocd_readonly_data():
function test_dict_learning_nonzero_coefs (line 246) | def test_dict_learning_nonzero_coefs():
function test_dict_learning_unknown_fit_algorithm (line 262) | def test_dict_learning_unknown_fit_algorithm():
function test_dict_learning_split (line 269) | def test_dict_learning_split():
function test_dict_learning_online_shapes (line 283) | def test_dict_learning_online_shapes():
function test_dict_learning_online_lars_positive_parameter (line 294) | def test_dict_learning_online_lars_positive_parameter():
function test_minibatch_dictionary_learning_positivity (line 311) | def test_minibatch_dictionary_learning_positivity(
function test_minibatch_dictionary_learning_lars (line 336) | def test_minibatch_dictionary_learning_lars(positive_dict):
function test_dict_learning_online_positivity (line 355) | def test_dict_learning_online_positivity(positive_code, positive_dict):
function test_dict_learning_online_verbosity (line 378) | def test_dict_learning_online_verbosity():
function test_dict_learning_online_estimator_shapes (line 407) | def test_dict_learning_online_estimator_shapes():
function test_dict_learning_online_overcomplete (line 414) | def test_dict_learning_online_overcomplete():
function test_dict_learning_online_initialization (line 420) | def test_dict_learning_online_initialization():
function test_dict_learning_online_readonly_initialization (line 430) | def test_dict_learning_online_readonly_initialization():
function test_dict_learning_online_partial_fit (line 440) | def test_dict_learning_online_partial_fit():
function test_dict_learning_iter_offset (line 465) | def test_dict_learning_iter_offset():
function test_sparse_encode_shapes (line 482) | def test_sparse_encode_shapes():
function test_sparse_encode_positivity (line 494) | def test_sparse_encode_positivity(algo, positive):
function test_sparse_encode_unavailable_positivity (line 507) | def test_sparse_encode_unavailable_positivity(algo):
function test_sparse_encode_input (line 518) | def test_sparse_encode_input():
function test_sparse_encode_error (line 530) | def test_sparse_encode_error():
function test_sparse_encode_error_default_sparsity (line 540) | def test_sparse_encode_error_default_sparsity():
function test_unknown_method (line 548) | def test_unknown_method():
function test_sparse_coder_estimator (line 556) | def test_sparse_coder_estimator():
function test_sparse_coder_estimator_clone (line 568) | def test_sparse_coder_estimator_clone():
function test_sparse_coder_parallel_mmap (line 586) | def test_sparse_coder_parallel_mmap():
function test_sparse_coder_common_transformer (line 605) | def test_sparse_coder_common_transformer():
function test_sparse_coder_deprecation (line 622) | def test_sparse_coder_deprecation():
function test_sparse_coder_n_features_in (line 633) | def test_sparse_coder_n_features_in():
function test_update_dict (line 639) | def test_update_dict():
function test_warning_default_transform_alpha (line 663) | def test_warning_default_transform_alpha(Estimator):
function test_get_feature_names_out (line 674) | def test_get_feature_names_out(estimator):
FILE: sklearn/decomposition/tests/test_factor_analysis.py
function test_factor_analysis (line 20) | def test_factor_analysis():
FILE: sklearn/decomposition/tests/test_fastica.py
function center_and_norm (line 19) | def center_and_norm(x, axis=-1):
function test_gs (line 35) | def test_gs():
function test_fastica_simple (line 55) | def test_fastica_simple(add_noise, seed):
function test_fastica_nowhiten (line 142) | def test_fastica_nowhiten():
function test_fastica_convergence_fail (line 153) | def test_fastica_convergence_fail():
function test_non_square_fastica (line 184) | def test_non_square_fastica(add_noise):
function test_fit_transform (line 229) | def test_fit_transform():
function test_inverse_transform (line 267) | def test_inverse_transform(whiten, n_components, expected_mixing_shape):
function test_fastica_errors (line 288) | def test_fastica_errors():
function test_fastica_whiten_unit_variance (line 309) | def test_fastica_whiten_unit_variance():
function test_fastica_whiten_default_value_deprecation (line 324) | def test_fastica_whiten_default_value_deprecation(ica):
function test_fastica_whiten_backwards_compatibility (line 336) | def test_fastica_whiten_backwards_compatibility():
function test_fastica_output_shape (line 375) | def test_fastica_output_shape(whiten, return_X_mean, return_n_iter):
FILE: sklearn/decomposition/tests/test_incremental_pca.py
function test_incremental_pca (line 18) | def test_incremental_pca():
function test_incremental_pca_sparse (line 48) | def test_incremental_pca_sparse(matrix_class):
function test_incremental_pca_check_projection (line 86) | def test_incremental_pca_check_projection():
function test_incremental_pca_inverse (line 107) | def test_incremental_pca_inverse():
function test_incremental_pca_validation (line 123) | def test_incremental_pca_validation():
function test_n_components_none (line 152) | def test_n_components_none():
function test_incremental_pca_set_params (line 170) | def test_incremental_pca_set_params():
function test_incremental_pca_num_features_change (line 193) | def test_incremental_pca_num_features_change():
function test_incremental_pca_batch_signs (line 205) | def test_incremental_pca_batch_signs():
function test_incremental_pca_batch_values (line 221) | def test_incremental_pca_batch_values():
function test_incremental_pca_batch_rank (line 237) | def test_incremental_pca_batch_rank():
function test_incremental_pca_partial_fit (line 253) | def test_incremental_pca_partial_fit():
function test_incremental_pca_against_pca_iris (line 273) | def test_incremental_pca_against_pca_iris():
function test_incremental_pca_against_pca_random_data (line 283) | def test_incremental_pca_against_pca_random_data():
function test_explained_variances (line 296) | def test_explained_variances():
function test_singular_values (line 315) | def test_singular_values():
function test_whitening (line 372) | def test_whitening():
function test_incremental_pca_partial_fit_float_division (line 393) | def test_incremental_pca_partial_fit_float_division():
function test_incremental_pca_fit_overflow_error (line 418) | def test_incremental_pca_fit_overflow_error():
function test_incremental_pca_feature_names_out (line 433) | def test_incremental_pca_feature_names_out():
FILE: sklearn/decomposition/tests/test_kernel_pca.py
function test_kernel_pca (line 23) | def test_kernel_pca():
function test_kernel_pca_invalid_solver (line 69) | def test_kernel_pca_invalid_solver():
function test_kernel_pca_invalid_parameters (line 75) | def test_kernel_pca_invalid_parameters():
function test_kernel_pca_consistent_transform (line 89) | def test_kernel_pca_consistent_transform():
function test_kernel_pca_deterministic_output (line 108) | def test_kernel_pca_deterministic_output():
function test_kernel_pca_sparse (line 125) | def test_kernel_pca_sparse():
function test_kernel_pca_linear_kernel (line 164) | def test_kernel_pca_linear_kernel(solver, n_features):
function test_kernel_pca_n_components (line 188) | def test_kernel_pca_n_components():
function test_remove_zero_eig (line 206) | def test_remove_zero_eig():
function test_leave_zero_eig (line 228) | def test_leave_zero_eig():
function test_kernel_pca_precomputed (line 255) | def test_kernel_pca_precomputed():
function test_kernel_pca_precomputed_non_symmetric (line 294) | def test_kernel_pca_precomputed_non_symmetric(solver):
function test_kernel_pca_invalid_kernel (line 320) | def test_kernel_pca_invalid_kernel():
function test_gridsearch_pipeline (line 332) | def test_gridsearch_pipeline():
function test_gridsearch_pipeline_precomputed (line 347) | def test_gridsearch_pipeline_precomputed():
function test_nested_circles (line 363) | def test_nested_circles():
function test_kernel_conditioning (line 391) | def test_kernel_conditioning():
function test_precomputed_kernel_not_psd (line 408) | def test_precomputed_kernel_not_psd(solver):
function test_kernel_pca_solvers_equivalence (line 460) | def test_kernel_pca_solvers_equivalence(n_components):
function test_kernel_pca_inverse_transform_reconstruction (line 496) | def test_kernel_pca_inverse_transform_reconstruction():
function test_kernel_pca_raise_not_fitted_error (line 513) | def test_kernel_pca_raise_not_fitted_error():
function test_32_64_decomposition_shape (line 521) | def test_32_64_decomposition_shape():
function test_kernel_pcc_pairwise_is_deprecated (line 539) | def test_kernel_pcc_pairwise_is_deprecated():
function test_kernel_pca_lambdas_deprecated (line 551) | def test_kernel_pca_lambdas_deprecated():
function test_kernel_pca_alphas_deprecated (line 560) | def test_kernel_pca_alphas_deprecated():
function test_kernel_pca_feature_names_out (line 568) | def test_kernel_pca_feature_names_out():
FILE: sklearn/decomposition/tests/test_nmf.py
function test_convergence_warning (line 24) | def test_convergence_warning(solver):
function test_initialize_nn_output (line 33) | def test_initialize_nn_output():
function test_parameter_checking (line 42) | def test_parameter_checking():
function test_initialize_close (line 85) | def test_initialize_close():
function test_initialize_variants (line 97) | def test_initialize_variants():
function test_nmf_fit_nn_output (line 117) | def test_nmf_fit_nn_output(solver, init, alpha_W, alpha_H):
function test_nmf_fit_close (line 133) | def test_nmf_fit_close(solver):
function test_nmf_transform (line 148) | def test_nmf_transform(solver):
function test_nmf_transform_custom_init (line 164) | def test_nmf_transform_custom_init():
function test_nmf_inverse_transform (line 179) | def test_nmf_inverse_transform(solver):
function test_n_components_greater_n_features (line 195) | def test_n_components_greater_n_features():
function test_nmf_sparse_input (line 205) | def test_nmf_sparse_input(solver, alpha_W, alpha_H):
function test_nmf_sparse_transform (line 234) | def test_nmf_sparse_transform():
function test_non_negative_factorization_consistency (line 254) | def test_non_negative_factorization_consistency(init, solver, alpha_W, a...
function test_non_negative_factorization_checking (line 297) | def test_non_negative_factorization_checking():
function _beta_divergence_dense (line 328) | def _beta_divergence_dense(X, W, H, beta):
function test_beta_divergence (line 358) | def test_beta_divergence():
function test_special_sparse_dot (line 381) | def test_special_sparse_dot():
function test_nmf_multiplicative_update_sparse (line 409) | def test_nmf_multiplicative_update_sparse():
function test_nmf_negative_beta_loss (line 487) | def test_nmf_negative_beta_loss():
function test_nmf_regularization (line 524) | def test_nmf_regularization(solver):
function test_nmf_decreasing (line 595) | def test_nmf_decreasing(solver):
function test_nmf_underflow (line 647) | def test_nmf_underflow():
function test_nmf_dtype_match (line 674) | def test_nmf_dtype_match(dtype_in, dtype_out, solver, alpha_W, alpha_H):
function test_nmf_float32_float64_consistency (line 686) | def test_nmf_float32_float64_consistency(solver):
function test_nmf_custom_init_dtype_error (line 699) | def test_nmf_custom_init_dtype_error():
function test_feature_names_out (line 714) | def test_feature_names_out():
FILE: sklearn/decomposition/tests/test_online_lda.py
function _build_sparse_mtx (line 26) | def _build_sparse_mtx():
function test_lda_default_prior_params (line 37) | def test_lda_default_prior_params():
function test_lda_fit_batch (line 54) | def test_lda_fit_batch():
function test_lda_fit_online (line 73) | def test_lda_fit_online():
function test_lda_partial_fit (line 93) | def test_lda_partial_fit():
function test_lda_dense_input (line 113) | def test_lda_dense_input():
function test_lda_transform (line 129) | def test_lda_transform():
function test_lda_fit_transform (line 142) | def test_lda_fit_transform(method):
function test_invalid_params (line 155) | def test_invalid_params():
function test_lda_negative_input (line 171) | def test_lda_negative_input():
function test_lda_no_component_error (line 180) | def test_lda_no_component_error():
function test_lda_multi_jobs (line 196) | def test_lda_multi_jobs(method):
function test_lda_partial_fit_multi_jobs (line 216) | def test_lda_partial_fit_multi_jobs():
function test_lda_preplexity_mismatch (line 236) | def test_lda_preplexity_mismatch():
function test_lda_perplexity (line 260) | def test_lda_perplexity(method):
function test_lda_score (line 291) | def test_lda_score(method):
function test_perplexity_input_format (line 317) | def test_perplexity_input_format():
function test_lda_score_perplexity (line 334) | def test_lda_score_perplexity():
function test_lda_fit_perplexity (line 348) | def test_lda_fit_perplexity():
function test_lda_empty_docs (line 370) | def test_lda_empty_docs():
function test_dirichlet_expectation (line 380) | def test_dirichlet_expectation():
function check_verbosity (line 396) | def check_verbosity(verbose, evaluate_every, expected_lines, expected_pe...
function test_verbosity (line 429) | def test_verbosity(verbose, evaluate_every, expected_lines, expected_per...
function test_lda_feature_names_out (line 433) | def test_lda_feature_names_out():
FILE: sklearn/decomposition/tests/test_pca.py
function test_pca (line 21) | def test_pca(svd_solver, n_components):
function test_no_empty_slice_warning (line 41) | def test_no_empty_slice_warning():
function test_whitening (line 54) | def test_whitening(solver, copy):
function test_pca_explained_variance_equivalence_solver (line 108) | def test_pca_explained_variance_equivalence_solver(svd_solver):
function test_pca_explained_variance_empirical (line 138) | def test_pca_explained_variance_empirical(X, svd_solver):
function test_pca_singular_values_consistency (line 149) | def test_pca_singular_values_consistency(svd_solver):
function test_pca_singular_values (line 164) | def test_pca_singular_values(svd_solver):
function test_pca_check_projection (line 194) | def test_pca_check_projection(svd_solver):
function test_pca_check_projection_list (line 209) | def test_pca_check_projection_list(svd_solver):
function test_pca_inverse (line 221) | def test_pca_inverse(svd_solver, whiten):
function test_pca_validation (line 267) | def test_pca_validation(svd_solver, data, n_components, err_msg):
function test_n_components_none (line 303) | def test_n_components_none(data, solver, n_components_):
function test_n_components_mle (line 310) | def test_n_components_mle(svd_solver):
function test_n_components_mle_error (line 321) | def test_n_components_mle_error(svd_solver):
function test_pca_dim (line 335) | def test_pca_dim():
function test_infer_dim_1 (line 346) | def test_infer_dim_1():
function test_infer_dim_2 (line 363) | def test_infer_dim_2():
function test_infer_dim_3 (line 377) | def test_infer_dim_3():
function test_infer_dim_by_explained_variance (line 398) | def test_infer_dim_by_explained_variance(X, n_components, n_components_v...
function test_pca_score (line 406) | def test_pca_score(svd_solver):
function test_pca_score3 (line 427) | def test_pca_score3():
function test_pca_sanity_noise_variance (line 443) | def test_pca_sanity_noise_variance(svd_solver):
function test_pca_score_consistency_solvers (line 455) | def test_pca_score_consistency_solvers(svd_solver):
function test_pca_zero_noise_variance_edge_cases (line 467) | def test_pca_zero_noise_variance_edge_cases(svd_solver):
function test_pca_svd_solver_auto (line 494) | def test_pca_svd_solver_auto(data, n_components, expected_solver):
function test_pca_sparse_input (line 505) | def test_pca_sparse_input(svd_solver):
function test_pca_bad_solver (line 515) | def test_pca_bad_solver():
function test_pca_deterministic_output (line 523) | def test_pca_deterministic_output(svd_solver):
function test_pca_dtype_preservation (line 535) | def test_pca_dtype_preservation(svd_solver):
function check_pca_float_dtype_preservation (line 540) | def check_pca_float_dtype_preservation(svd_solver):
function check_pca_int_dtype_upcast_to_double (line 559) | def check_pca_int_dtype_upcast_to_double(svd_solver):
function test_pca_n_components_mostly_explained_variance_ratio (line 576) | def test_pca_n_components_mostly_explained_variance_ratio():
function test_assess_dimension_bad_rank (line 588) | def test_assess_dimension_bad_rank():
function test_small_eigenvalues_mle (line 597) | def test_small_eigenvalues_mle():
function test_mle_redundant_data (line 610) | def test_mle_redundant_data():
function test_fit_mle_too_few_samples (line 625) | def test_fit_mle_too_few_samples():
function test_mle_simple_case (line 638) | def test_mle_simple_case():
function test_assess_dimesion_rank_one (line 649) | def test_assess_dimesion_rank_one():
function test_pca_randomized_svd_n_oversamples (line 662) | def test_pca_randomized_svd_n_oversamples():
function test_pca_params_validation (line 703) | def test_pca_params_validation(params, err_type, err_msg):
function test_feature_names_out (line 711) | def test_feature_names_out():
FILE: sklearn/decomposition/tests/test_sparse_pca.py
function generate_toy_data (line 18) | def generate_toy_data(n_components, n_samples, image_size, random_state=...
function test_correct_shapes (line 44) | def test_correct_shapes():
function test_fit_transform (line 58) | def test_fit_transform():
function test_fit_transform_parallel (line 72) | def test_fit_transform_parallel():
function test_transform_nan (line 88) | def test_transform_nan():
function test_fit_transform_tall (line 98) | def test_fit_transform_tall():
function test_initialization (line 108) | def test_initialization():
function test_mini_batch_correct_shapes (line 119) | def test_mini_batch_correct_shapes():
function test_mini_batch_fit_transform (line 135) | def test_mini_batch_fit_transform():
function test_scaling_fit_transform (line 166) | def test_scaling_fit_transform():
function test_pca_vs_spca (line 176) | def test_pca_vs_spca():
function test_spca_n_components_ (line 196) | def test_spca_n_components_(SPCA, n_components):
function test_spca_feature_names_out (line 210) | def test_spca_feature_names_out(SPCA):
FILE: sklearn/decomposition/tests/test_truncated_svd.py
function X_sparse (line 16) | def X_sparse():
function test_solvers (line 26) | def test_solvers(X_sparse, solver, kind):
function test_attributes (line 43) | def test_attributes(n_components, X_sparse):
function test_too_many_components (line 51) | def test_too_many_components(algorithm, X_sparse):
function test_sparse_formats (line 60) | def test_sparse_formats(fmt, X_sparse):
function test_inverse_transform (line 71) | def test_inverse_transform(algo, X_sparse):
function test_integers (line 80) | def test_integers(X_sparse):
function test_explained_variance (line 91) | def test_explained_variance(X_sparse, kind, n_components, solver):
function test_explained_variance_components_10_20 (line 114) | def test_explained_variance_components_10_20(X_sparse, kind, solver):
function test_singular_values_consistency (line 133) | def test_singular_values_consistency(solver):
function test_singular_values_expected (line 156) | def test_singular_values_expected(solver):
function test_truncated_svd_eq_pca (line 176) | def test_truncated_svd_eq_pca(X_sparse):
function test_fit_transform (line 200) | def test_fit_transform(X_sparse, algorithm, tol, kind):
FILE: sklearn/discriminant_analysis.py
function _cov (line 30) | def _cov(X, shrinkage=None, covariance_estimator=None):
function _class_means (line 96) | def _class_means(X, y):
function _class_cov (line 120) | def _class_cov(X, y, priors, shrinkage=None, covariance_estimator=None):
class LinearDiscriminantAnalysis (line 167) | class LinearDiscriminantAnalysis(
method __init__ (line 312) | def __init__(
method _solve_lsqr (line 330) | def _solve_lsqr(self, X, y, shrinkage, covariance_estimator):
method _solve_eigen (line 386) | def _solve_eigen(self, X, y, shrinkage, covariance_estimator):
method _solve_svd (line 452) | def _solve_svd(self, X, y):
method fit (line 522) | def fit(self, X, y):
method transform (line 619) | def transform(self, X):
method predict_proba (line 646) | def predict_proba(self, X):
method predict_log_proba (line 668) | def predict_log_proba(self, X):
method decision_function (line 685) | def decision_function(self, X):
class QuadraticDiscriminantAnalysis (line 709) | class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator):
method __init__ (line 810) | def __init__(
method fit (line 818) | def fit(self, X, y):
method _decision_function (line 893) | def _decision_function(self, X):
method decision_function (line 909) | def decision_function(self, X):
method predict (line 935) | def predict(self, X):
method predict_proba (line 955) | def predict_proba(self, X):
method p
Copy disabled (too large)
Download .json
Condensed preview — 1269 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (15,659K chars).
[
{
"path": ".binder/postBuild",
"chars": 1588,
"preview": "#!/bin/bash\n\nset -e\n\n# This script is called in a binder context. When this script is called, we are\n# inside a git chec"
},
{
"path": ".binder/requirements.txt",
"chars": 147,
"preview": "--extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn\n--pre\nmatplotlib\nscikit-image\npandas\nsp"
},
{
"path": ".circleci/artifact_path",
"chars": 20,
"preview": "0/doc/_changed.html\n"
},
{
"path": ".circleci/config.yml",
"chars": 5116,
"preview": "version: 2.1\n\njobs:\n doc-min-dependencies:\n docker:\n - image: circleci/python:3.7.7-buster\n environment:\n "
},
{
"path": ".codecov.yml",
"chars": 921,
"preview": "comment: false\n\ncoverage:\n status:\n project:\n default:\n # Commits pushed to main should not make the ove"
},
{
"path": ".coveragerc",
"chars": 150,
"preview": "[run]\nbranch = True\nsource = sklearn\nparallel = True\nomit =\n */sklearn/externals/*\n */sklearn/_build_utils/*\n *"
},
{
"path": ".git-blame-ignore-revs",
"chars": 690,
"preview": "# Since git version 2.23, git-blame has a feature to ignore\n# certain commits.\n#\n# This file contains a list of commits "
},
{
"path": ".gitattributes",
"chars": 34,
"preview": "/doc/whats_new/v*.rst merge=union\n"
},
{
"path": ".github/FUNDING.yml",
"chars": 687,
"preview": "# These are supported funding model platforms\n\ngithub: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [u"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.yml",
"chars": 2559,
"preview": "name: Bug Report\ndescription: Create a report to help us reproduce and correct the bug\nlabels: ['Bug: triage']\n\nbody:\n- "
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 843,
"preview": "blank_issues_enabled: true\ncontact_links:\n - name: Discussions\n url: https://github.com/scikit-learn/scikit-learn/di"
},
{
"path": ".github/ISSUE_TEMPLATE/doc_improvement.yml",
"chars": 579,
"preview": "name: Documentation improvement\ndescription: Create a report to help us improve the documentation. Alternatively you can"
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.yml",
"chars": 765,
"preview": "name: Feature request\ndescription: Suggest a new algorithm, enhancement to an existing algorithm, etc.\nlabels: ['New Fea"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE.md",
"chars": 1173,
"preview": "<!--\nThanks for contributing a pull request! Please ensure you have taken a look at\nthe contribution guidelines: https:/"
},
{
"path": ".github/labeler-file-extensions.yml",
"chars": 151,
"preview": "cython:\n- sklearn/**/*.pyx\n- sklearn/**/*.pxd\n- sklearn/**/*.pxi\n# Tempita templates\n- sklearn/**/*.pyx.tp\n- sklearn/**/"
},
{
"path": ".github/labeler-module.yml",
"chars": 1249,
"preview": "module:cluster:\n- sklearn/cluster/**/*\n\nmodule:common:\n- sklearn/common/**/*\n\nmodule:compose:\n- sklearn/compose/**/*\n\nmo"
},
{
"path": ".github/scripts/label_title_regex.py",
"chars": 638,
"preview": "\"\"\"Labels PRs based on title. Must be run in a github action with the\npull_request_target event.\"\"\"\nfrom github import G"
},
{
"path": ".github/workflows/assign.yml",
"chars": 800,
"preview": "\nname: Assign\non:\n issue_comment:\n types: created\n\njobs:\n one:\n runs-on: ubuntu-latest\n if: >-\n (github."
},
{
"path": ".github/workflows/check-changelog.yml",
"chars": 2960,
"preview": "name: Check Changelog\n# This check makes sure that the changelog is properly updated\n# when a PR introduces a change in "
},
{
"path": ".github/workflows/check-manifest.yml",
"chars": 480,
"preview": "name: \"Check Manifest\"\n\non:\n schedule:\n - cron: '0 0 * * *'\n\njobs:\n check:\n runs-on: ubuntu-latest\n steps:\n "
},
{
"path": ".github/workflows/labeler-module.yml",
"chars": 699,
"preview": "name: \"Pull Request Labeler\"\non: pull_request_target\n\njobs:\n triage:\n runs-on: ubuntu-latest\n steps:\n - uses: "
},
{
"path": ".github/workflows/labeler-title-regex.yml",
"chars": 522,
"preview": "name: Pull Request Regex Title Labeler\non:\n pull_request_target:\n types: [opened, edited]\n\npermissions:\n contents: "
},
{
"path": ".github/workflows/publish_pypi.yml",
"chars": 1586,
"preview": "name: Publish to Pypi\non:\n workflow_dispatch:\n inputs:\n version:\n description: 'Version upload to pypi'\n"
},
{
"path": ".github/workflows/twitter.yml",
"chars": 840,
"preview": "# Tweet the URL of a commit on @sklearn_commits whenever a push event\n# happens on the main branch\nname: Twitter Push No"
},
{
"path": ".github/workflows/unassign.yml",
"chars": 550,
"preview": "name: Unassign\n#Runs when a contributor has unassigned themselves from the issue and adds 'help wanted'\non:\n issues:\n "
},
{
"path": ".github/workflows/wheels.yml",
"chars": 5834,
"preview": "# Workflow to build and test wheels\nname: Wheel builder\n\non:\n schedule:\n # Nightly build at 3:42 A.M.\n - cron: \"4"
},
{
"path": ".gitignore",
"chars": 1055,
"preview": "*.pyc\n*.so\n*.pyd\n*~\n.#*\n*.lprof\n*.swp\n*.swo\n.DS_Store\nbuild\nsklearn/datasets/__config__.py\nsklearn/**/*.html\n\ndist/\nMANI"
},
{
"path": ".mailmap",
"chars": 7262,
"preview": "Alexandre Gramfort <alexandre.gramfort@inria.fr> <alexandre.gramfort@gmail.com>\nAlexandre Gramfort <alexandre.gramfort@i"
},
{
"path": ".pre-commit-config.yaml",
"chars": 547,
"preview": "repos:\n- repo: https://github.com/pre-commit/pre-commit-hooks\n rev: v2.3.0\n hooks:\n - id: check-yaml\n - "
},
{
"path": ".travis.yml",
"chars": 2460,
"preview": "# Make it explicit that we favor the\n# new container-based Travis workers\nlanguage: python\ndist: xenial\n\ncache:\n apt: t"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 645,
"preview": "# Code of Conduct\n\nWe are a community based on openness, as well as friendly and didactic discussions.\n\nWe aspire to tre"
},
{
"path": "CONTRIBUTING.md",
"chars": 2109,
"preview": "\nContributing to scikit-learn\n============================\n\nThe latest contributing guide is available in the repository"
},
{
"path": "COPYING",
"chars": 1532,
"preview": "BSD 3-Clause License\n\nCopyright (c) 2007-2021 The scikit-learn developers.\nAll rights reserved.\n\nRedistribution and use "
},
{
"path": "MANIFEST.in",
"chars": 941,
"preview": "include *.rst\nrecursive-include doc *\nrecursive-include examples *\nrecursive-include sklearn *.c *.h *.pyx *.pxd *.pxi *"
},
{
"path": "Makefile",
"chars": 1634,
"preview": "# simple makefile to simplify repetitive build env management tasks under posix\n\n# caution: testing won't work on window"
},
{
"path": "README.rst",
"chars": 6972,
"preview": ".. -*- mode: rst -*-\n\n|Azure|_ |Travis|_ |Codecov|_ |CircleCI|_ |Nightly wheels|_ |Black|_ |PythonVersion|_ |PyPi|_ |DOI"
},
{
"path": "SECURITY.md",
"chars": 556,
"preview": "# Security Policy\n\n## Supported Versions\n\n| Version | Supported |\n| --------- | ------------------ |\n| 1.0.1 "
},
{
"path": "asv_benchmarks/.gitignore",
"chars": 66,
"preview": "*__pycache__*\nenv/\nhtml/\nresults/\nscikit-learn/\nbenchmarks/cache/\n"
},
{
"path": "asv_benchmarks/asv.conf.json",
"chars": 6732,
"preview": "{\n // The version of the config file format. Do not change, unless\n // you know what you are doing.\n \"version\""
},
{
"path": "asv_benchmarks/benchmarks/__init__.py",
"chars": 49,
"preview": "\"\"\"Benchmark suite for scikit-learn using ASV\"\"\"\n"
},
{
"path": "asv_benchmarks/benchmarks/cluster.py",
"chars": 2925,
"preview": "from sklearn.cluster import KMeans, MiniBatchKMeans\n\nfrom .common import Benchmark, Estimator, Predictor, Transformer\nfr"
},
{
"path": "asv_benchmarks/benchmarks/common.py",
"chars": 7341,
"preview": "import os\nimport json\nimport timeit\nimport pickle\nimport itertools\nfrom abc import ABC, abstractmethod\nfrom pathlib impo"
},
{
"path": "asv_benchmarks/benchmarks/config.json",
"chars": 1544,
"preview": "{\n // \"regular\": Bencharks are run on small to medium datasets. Each benchmark\n // is run multiple time"
},
{
"path": "asv_benchmarks/benchmarks/datasets.py",
"chars": 5144,
"preview": "import numpy as np\nimport scipy.sparse as sp\nfrom joblib import Memory\nfrom pathlib import Path\n\nfrom sklearn.decomposit"
},
{
"path": "asv_benchmarks/benchmarks/decomposition.py",
"chars": 2375,
"preview": "from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning\n\nfrom .common import Benchmark, E"
},
{
"path": "asv_benchmarks/benchmarks/ensemble.py",
"chars": 2995,
"preview": "from sklearn.ensemble import (\n RandomForestClassifier,\n GradientBoostingClassifier,\n HistGradientBoostingClass"
},
{
"path": "asv_benchmarks/benchmarks/linear_model.py",
"chars": 6549,
"preview": "from sklearn.linear_model import (\n LogisticRegression,\n Ridge,\n ElasticNet,\n Lasso,\n LinearRegression,\n "
},
{
"path": "asv_benchmarks/benchmarks/manifold.py",
"chars": 820,
"preview": "from sklearn.manifold import TSNE\n\nfrom .common import Benchmark, Estimator\nfrom .datasets import _digits_dataset\n\n\nclas"
},
{
"path": "asv_benchmarks/benchmarks/metrics.py",
"chars": 1363,
"preview": "from sklearn.metrics.pairwise import pairwise_distances\n\nfrom .common import Benchmark\nfrom .datasets import _random_dat"
},
{
"path": "asv_benchmarks/benchmarks/model_selection.py",
"chars": 2371,
"preview": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import GridSearchCV, cross_val_score\n\nf"
},
{
"path": "asv_benchmarks/benchmarks/neighbors.py",
"chars": 1140,
"preview": "from sklearn.neighbors import KNeighborsClassifier\n\nfrom .common import Benchmark, Estimator, Predictor\nfrom .datasets i"
},
{
"path": "asv_benchmarks/benchmarks/svm.py",
"chars": 762,
"preview": "from sklearn.svm import SVC\n\nfrom .common import Benchmark, Estimator, Predictor\nfrom .datasets import _synth_classifica"
},
{
"path": "asv_benchmarks/benchmarks/utils.py",
"chars": 1310,
"preview": "import numpy as np\n\nfrom sklearn.metrics import balanced_accuracy_score, r2_score\n\n\ndef neg_mean_inertia(X, labels, cent"
},
{
"path": "azure-pipelines.yml",
"chars": 8673,
"preview": "# Adapted from https://github.com/pandas-dev/pandas/blob/master/azure-pipelines.yml\nschedules:\n- cron: \"30 2 * * *\"\n di"
},
{
"path": "benchmarks/.gitignore",
"chars": 41,
"preview": "/bhtsne\n*.npy\n*.json\n/mnist_tsne_output/\n"
},
{
"path": "benchmarks/bench_20newsgroups.py",
"chars": 3165,
"preview": "from time import time\nimport argparse\nimport numpy as np\n\nfrom sklearn.dummy import DummyClassifier\n\nfrom sklearn.datase"
},
{
"path": "benchmarks/bench_covertype.py",
"chars": 7421,
"preview": "\"\"\"\n===========================\nCovertype dataset benchmark\n===========================\n\nBenchmark stochastic gradient d"
},
{
"path": "benchmarks/bench_feature_expansions.py",
"chars": 1773,
"preview": "import matplotlib.pyplot as plt\nimport numpy as np\nimport scipy.sparse as sparse\nfrom sklearn.preprocessing import Polyn"
},
{
"path": "benchmarks/bench_glm.py",
"chars": 1479,
"preview": "\"\"\"\nA comparison of different methods in GLM\n\nData comes from a random square matrix.\n\n\"\"\"\nfrom datetime import datetime"
},
{
"path": "benchmarks/bench_glmnet.py",
"chars": 3965,
"preview": "\"\"\"\nTo run this, you'll need to have installed.\n\n * glmnet-python\n * scikit-learn (of course)\n\nDoes two benchmarks\n\nFi"
},
{
"path": "benchmarks/bench_hist_gradient_boosting.py",
"chars": 9687,
"preview": "from time import time\nimport argparse\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom sklearn.model_selection i"
},
{
"path": "benchmarks/bench_hist_gradient_boosting_adult.py",
"chars": 2998,
"preview": "import argparse\nfrom time import time\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.datasets import"
},
{
"path": "benchmarks/bench_hist_gradient_boosting_categorical_only.py",
"chars": 2621,
"preview": "import argparse\nfrom time import time\n\nfrom sklearn.preprocessing import KBinsDiscretizer\nfrom sklearn.datasets import m"
},
{
"path": "benchmarks/bench_hist_gradient_boosting_higgsboson.py",
"chars": 3815,
"preview": "from urllib.request import urlretrieve\nimport os\nfrom gzip import GzipFile\nfrom time import time\nimport argparse\n\nimport"
},
{
"path": "benchmarks/bench_hist_gradient_boosting_threading.py",
"chars": 11094,
"preview": "from time import time\nimport argparse\nimport os\nfrom pprint import pprint\n\nimport numpy as np\nfrom threadpoolctl import "
},
{
"path": "benchmarks/bench_isolation_forest.py",
"chars": 5463,
"preview": "\"\"\"\n==========================================\nIsolationForest benchmark\n==========================================\nA te"
},
{
"path": "benchmarks/bench_isotonic.py",
"chars": 3274,
"preview": "\"\"\"\nBenchmarks of isotonic regression performance.\n\nWe generate a synthetic dataset of size 10^n, for n in [min, max], a"
},
{
"path": "benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py",
"chars": 5463,
"preview": "\"\"\"\n=============================================================\nKernel PCA Solvers comparison benchmark: time vs n_com"
},
{
"path": "benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py",
"chars": 5731,
"preview": "\"\"\"\n==========================================================\nKernel PCA Solvers comparison benchmark: time vs n_sample"
},
{
"path": "benchmarks/bench_lasso.py",
"chars": 3144,
"preview": "\"\"\"\nBenchmarks of Lasso vs LassoLars\n\nFirst, we fix a training set and increase the number of\nsamples. Then we plot the "
},
{
"path": "benchmarks/bench_lof.py",
"chars": 3491,
"preview": "\"\"\"\n============================\nLocalOutlierFactor benchmark\n============================\n\nA test of LocalOutlierFactor"
},
{
"path": "benchmarks/bench_mnist.py",
"chars": 7032,
"preview": "\"\"\"\n=======================\nMNIST dataset benchmark\n=======================\n\nBenchmark on the MNIST dataset. The datase"
},
{
"path": "benchmarks/bench_multilabel_metrics.py",
"chars": 6796,
"preview": "#!/usr/bin/env python\n\"\"\"\nA comparison of multilabel target formats and metrics over them\n\"\"\"\n\nfrom timeit import timeit"
},
{
"path": "benchmarks/bench_online_ocsvm.py",
"chars": 9422,
"preview": "\"\"\"\n=====================================\nSGDOneClassSVM benchmark\n=====================================\nThis benchmark "
},
{
"path": "benchmarks/bench_plot_fastkmeans.py",
"chars": 4420,
"preview": "from collections import defaultdict\nfrom time import time\n\nimport numpy as np\nfrom numpy import random as nr\n\nfrom sklea"
},
{
"path": "benchmarks/bench_plot_hierarchical.py",
"chars": 2556,
"preview": "from collections import defaultdict\nfrom time import time\n\nimport numpy as np\nfrom numpy import random as nr\n\nfrom sklea"
},
{
"path": "benchmarks/bench_plot_incremental_pca.py",
"chars": 5559,
"preview": "\"\"\"\n========================\nIncrementalPCA benchmark\n========================\n\nBenchmarks for IncrementalPCA\n\n\"\"\"\n\nimpo"
},
{
"path": "benchmarks/bench_plot_lasso_path.py",
"chars": 3942,
"preview": "\"\"\"Benchmarks of Lasso regularization path computation using Lars and CD\n\nThe input data is mostly low rank but is a fat"
},
{
"path": "benchmarks/bench_plot_neighbors.py",
"chars": 5721,
"preview": "\"\"\"\nPlot the scaling of the nearest neighbors algorithms with k, D, and N\n\"\"\"\nfrom time import time\n\nimport numpy as np\n"
},
{
"path": "benchmarks/bench_plot_nmf.py",
"chars": 15607,
"preview": "\"\"\"\nBenchmarks of Non-Negative Matrix Factorization\n\"\"\"\n# Authors: Tom Dupre la Tour (benchmark)\n# Chih-Jen Lin"
},
{
"path": "benchmarks/bench_plot_omp_lars.py",
"chars": 4413,
"preview": "\"\"\"Benchmarks of orthogonal matching pursuit (:ref:`OMP`) versus least angle\nregression (:ref:`least_angle_regression`)\n"
},
{
"path": "benchmarks/bench_plot_parallel_pairwise.py",
"chars": 1272,
"preview": "# Author: Mathieu Blondel <mathieu@mblondel.org>\n# License: BSD 3 clause\nimport time\n\nimport matplotlib.pyplot as plt\n\nf"
},
{
"path": "benchmarks/bench_plot_polynomial_kernel_approximation.py",
"chars": 5995,
"preview": "\"\"\"\n========================================================================\nBenchmark for explicit feature map approxim"
},
{
"path": "benchmarks/bench_plot_randomized_svd.py",
"chars": 17940,
"preview": "\"\"\"\nBenchmarks on the power iterations phase in randomized SVD.\n\nWe test on various synthetic and real datasets the effe"
},
{
"path": "benchmarks/bench_plot_svd.py",
"chars": 2749,
"preview": "\"\"\"Benchmarks of Singular Value Decomposition (Exact and Approximate)\n\nThe data is mostly low rank but is a fat infinite"
},
{
"path": "benchmarks/bench_plot_ward.py",
"chars": 1270,
"preview": "\"\"\"\nBenchmark scikit-learn's Ward implement compared to SciPy's\n\"\"\"\n\nimport time\n\nimport numpy as np\nfrom scipy.cluster "
},
{
"path": "benchmarks/bench_random_projections.py",
"chars": 8572,
"preview": "\"\"\"\n===========================\nRandom projection benchmark\n===========================\n\nBenchmarks for random projectio"
},
{
"path": "benchmarks/bench_rcv1_logreg_convergence.py",
"chars": 7808,
"preview": "# Authors: Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>\n# Olivier Grisel <olivier.grisel@ensta.org>\n#\n# Licens"
},
{
"path": "benchmarks/bench_saga.py",
"chars": 10887,
"preview": "\"\"\"Author: Arthur Mensch, Nelle Varoquaux\n\nBenchmarks of sklearn SAGA vs lightning SAGA vs Liblinear. Shows the gain\nin "
},
{
"path": "benchmarks/bench_sample_without_replacement.py",
"chars": 7311,
"preview": "\"\"\"\nBenchmarks for sampling without replacement of integer.\n\n\"\"\"\nimport gc\nimport sys\nimport optparse\nfrom datetime impo"
},
{
"path": "benchmarks/bench_sgd_regression.py",
"chars": 5340,
"preview": "# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotli"
},
{
"path": "benchmarks/bench_sparsify.py",
"chars": 3360,
"preview": "\"\"\"\nBenchmark SGD prediction time with dense/sparse coefficients.\n\nInvoke with\n-----------\n\n$ kernprof.py -l sparsity_be"
},
{
"path": "benchmarks/bench_text_vectorizers.py",
"chars": 1920,
"preview": "\"\"\"\n\nTo run this benchmark, you will need,\n\n * scikit-learn\n * pandas\n * memory_profiler\n * psutil (optional, but recomm"
},
{
"path": "benchmarks/bench_tree.py",
"chars": 3625,
"preview": "\"\"\"\nTo run this, you'll need to have installed.\n\n * scikit-learn\n\nDoes two benchmarks\n\nFirst, we fix a training set, in"
},
{
"path": "benchmarks/bench_tsne_mnist.py",
"chars": 6385,
"preview": "\"\"\"\n=============================\nMNIST dataset T-SNE benchmark\n=============================\n\n\"\"\"\n\n# License: BSD 3 cla"
},
{
"path": "benchmarks/plot_tsne_mnist.py",
"chars": 858,
"preview": "import matplotlib.pyplot as plt\nimport numpy as np\nimport os.path as op\n\nimport argparse\n\n\nLOG_DIR = \"mnist_tsne_output\""
},
{
"path": "build_tools/Makefile",
"chars": 77,
"preview": "# Makefile for maintenance tools\n\nauthors:\n\tpython generate_authors_table.py\n"
},
{
"path": "build_tools/azure/install.sh",
"chars": 7221,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nUNAMESTR=`uname`\n\nif [[ \"$DISTRIB\" == \"conda-mamba-pypy3\" ]]; then\n # condaforge/mambafor"
},
{
"path": "build_tools/azure/install_win.sh",
"chars": 816,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nif [[ \"$PYTHON_ARCH\" == \"64\" ]]; then\n conda create -n $VIRTUALENV -q -y python=$PYTHON_V"
},
{
"path": "build_tools/azure/posix-docker.yml",
"chars": 2883,
"preview": "parameters:\n name: ''\n vmImage: ''\n matrix: []\n dependsOn: []\n condition: ne(variables['Build.Reason'], 'Schedule')"
},
{
"path": "build_tools/azure/posix.yml",
"chars": 3695,
"preview": "parameters:\n name: ''\n vmImage: ''\n matrix: []\n dependsOn: []\n condition: ''\n\njobs:\n- job: ${{ parameters.name }}\n "
},
{
"path": "build_tools/azure/test_docs.sh",
"chars": 272,
"preview": "#!/bin/bash\n\nset -e\n\nif [[ \"$DISTRIB\" =~ ^conda.* ]]; then\n source activate $VIRTUALENV\nelif [[ \"$DISTRIB\" == \"ubuntu"
},
{
"path": "build_tools/azure/test_docstring.sh",
"chars": 296,
"preview": "#!/bin/bash\n\nset -e\n\nif [[ \"$DISTRIB\" =~ ^conda.* ]]; then\n source activate $VIRTUALENV\nelif [[ \"$DISTRIB\" == \"ubuntu"
},
{
"path": "build_tools/azure/test_pytest_soft_dependency.sh",
"chars": 710,
"preview": "#!/bin/bash\n\nset -e\n\n# called when DISTRIB==\"conda\"\nsource activate $VIRTUALENV\nconda remove -y py pytest || pip uninsta"
},
{
"path": "build_tools/azure/test_script.sh",
"chars": 1930,
"preview": "#!/bin/bash\n\nset -e\n\nif [[ \"$DISTRIB\" =~ ^conda.* ]]; then\n source activate $VIRTUALENV\nelif [[ \"$DISTRIB\" == \"ubuntu"
},
{
"path": "build_tools/azure/upload_codecov.sh",
"chars": 439,
"preview": "#!/bin/bash\n\nset -e\n\n# called when COVERAGE==\"true\" and DISTRIB==\"conda\"\nexport PATH=$HOME/miniconda3/bin:$PATH\nsource a"
},
{
"path": "build_tools/azure/windows.yml",
"chars": 1660,
"preview": "\nparameters:\n name: ''\n vmImage: ''\n matrix: []\n dependsOn: []\n condition: ne(variables['Build.Reason'], 'Schedule'"
},
{
"path": "build_tools/circle/build_doc.sh",
"chars": 9472,
"preview": "#!/usr/bin/env bash\nset -x\nset -e\n\n# Decide what kind of documentation build to run, and run it.\n#\n# If the last commit "
},
{
"path": "build_tools/circle/build_test_arm.sh",
"chars": 2715,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nUNAMESTR=`uname`\nN_CORES=`nproc --all`\n\n\nsetup_ccache() {\n echo \"Setting up ccache\"\n m"
},
{
"path": "build_tools/circle/build_test_pypy.sh",
"chars": 1156,
"preview": "#!/usr/bin/env bash\nset -x\nset -e\n\n# System build tools\napt-get -yq update\napt-get -yq install wget bzip2 build-essentia"
},
{
"path": "build_tools/circle/checkout_merge_commit.sh",
"chars": 920,
"preview": "#!/bin/bash\n\n\n# Add `main` branch to the update list.\n# Otherwise CircleCI will give us a cached one.\nFETCH_REFS=\"+main:"
},
{
"path": "build_tools/circle/linting.sh",
"chars": 6943,
"preview": "#!/bin/bash\n\n# This script is used in CircleCI to check that PRs do not add obvious\n# flake8 violations. It relies on tw"
},
{
"path": "build_tools/circle/list_versions.py",
"chars": 3731,
"preview": "#!/usr/bin/env python3\n\n# List all available versions of the documentation\nimport json\nimport re\nimport sys\n\nfrom distut"
},
{
"path": "build_tools/circle/push_doc.sh",
"chars": 1611,
"preview": "#!/bin/bash\n# This script is meant to be called in the \"deploy\" step defined in\n# circle.yml. See https://circleci.com/d"
},
{
"path": "build_tools/codespell_ignore_words.txt",
"chars": 232,
"preview": "aggresive\naline\nba\nbasf\nboun\nbre\ncach\ncomplies\ncoo\ncopys\ndeine\ndidi\nfeld\nfo\nfpr\nfro\nfwe\ngool\nhart\nhist\nines\ninout\nist\nja"
},
{
"path": "build_tools/generate_authors_table.py",
"chars": 5459,
"preview": "\"\"\"\nThis script generates an html table of contributors, with names and avatars.\nThe list is generated from scikit-learn"
},
{
"path": "build_tools/github/Windows",
"chars": 417,
"preview": "# Get the Python version of the base image from a build argument\nARG PYTHON_VERSION\nFROM winamd64/python:$PYTHON_VERSION"
},
{
"path": "build_tools/github/build_minimal_windows_image.sh",
"chars": 944,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nPYTHON_VERSION=$1\nBITNESS=$2\n\nif [[ \"$BITNESS\" == \"32\" ]]; then\n # 32-bit architectures a"
},
{
"path": "build_tools/github/build_source.sh",
"chars": 391,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\n# Move up two levels to create the virtual\n# environment outside of the source folder\ncd ../"
},
{
"path": "build_tools/github/build_wheels.sh",
"chars": 1352,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\n# OpenMP is not present on macOS by default\nif [[ \"$RUNNER_OS\" == \"macOS\" ]]; then\n # Mak"
},
{
"path": "build_tools/github/check_build_trigger.sh",
"chars": 278,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nCOMMIT_MSG=$(git log --no-merges -1 --oneline)\n\n# The commit marker \"[cd build]\" will trigge"
},
{
"path": "build_tools/github/check_wheels.py",
"chars": 1324,
"preview": "\"\"\"Checks that dist/* contains the number of wheels built from the\n.github/workflows/wheels.yml config.\"\"\"\nimport yaml\nf"
},
{
"path": "build_tools/github/repair_windows_wheels.sh",
"chars": 354,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nWHEEL=$1\nDEST_DIR=$2\nBITNESS=$3\n\n# By default, the Windows wheels are not repaired.\n# In thi"
},
{
"path": "build_tools/github/test_source.sh",
"chars": 359,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\ncd ../../\n\npython -m venv test_env\nsource test_env/bin/activate\n\npython -m pip install sciki"
},
{
"path": "build_tools/github/test_wheels.sh",
"chars": 436,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nif [[ \"$OSTYPE\" != \"linux-gnu\" ]]; then\n # The Linux test environment is run in a Docker "
},
{
"path": "build_tools/github/test_windows_wheels.sh",
"chars": 713,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nPYTHON_VERSION=$1\nBITNESS=$2\n\nif [[ \"$BITNESS\" == \"32\" ]]; then\n # 32-bit architectures u"
},
{
"path": "build_tools/github/upload_anaconda.sh",
"chars": 654,
"preview": "#!/bin/bash\n\nset -e\nset -x\n\nif [ \"$GITHUB_EVENT_NAME\" == \"schedule\" ]; then\n ANACONDA_ORG=\"scipy-wheels-nightly\"\n "
},
{
"path": "build_tools/github/vendor.py",
"chars": 5765,
"preview": "\"\"\"Embed vcomp140.dll, vcruntime140.dll and vcruntime140_1.dll.\n\nNote that vcruntime140_1.dll is only required (and avai"
},
{
"path": "build_tools/shared.sh",
"chars": 470,
"preview": "get_dep() {\n package=\"$1\"\n version=\"$2\"\n if [[ \"$version\" == \"none\" ]]; then\n # do not install with none"
},
{
"path": "build_tools/travis/after_success.sh",
"chars": 1402,
"preview": "#!/bin/bash\n\n# This script is meant to be called by the \"after_success\" step\n# defined in \".travis.yml\". In particular, "
},
{
"path": "build_tools/travis/install.sh",
"chars": 371,
"preview": "#!/bin/bash\n\n# This script is meant to be called by the \"install\" step\n# defined in the \".travis.yml\" file. In particula"
},
{
"path": "build_tools/travis/install_main.sh",
"chars": 1850,
"preview": "#!/bin/bash\n\n# Travis clone \"scikit-learn/scikit-learn\" repository into\n# a local repository. We use a cached directory "
},
{
"path": "build_tools/travis/install_wheels.sh",
"chars": 141,
"preview": "#!/bin/bash\n\npython -m pip install cibuildwheel || travis_terminate $?\npython -m cibuildwheel --output-dir wheelhouse ||"
},
{
"path": "build_tools/travis/script.sh",
"chars": 496,
"preview": "#!/bin/bash\n\n# This script is meant to be called by the \"script\" step defined\n# in the \".travis.yml\" file. While this st"
},
{
"path": "build_tools/travis/test_docs.sh",
"chars": 157,
"preview": "#!/bin/bash\n\nset -e\n\nif [[ $TRAVIS_CPU_ARCH != arm64 ]]; then\n # Faster run of the documentation tests\n PYTEST=\"py"
},
{
"path": "build_tools/travis/test_script.sh",
"chars": 944,
"preview": "#!/bin/bash\n\nset -e\n\npython --version\npython -c \"import numpy; print(f'numpy {numpy.__version__}')\"\npython -c \"import sc"
},
{
"path": "build_tools/travis/test_wheels.sh",
"chars": 375,
"preview": "#!/bin/bash\n\npip install --upgrade pip || travis_terminate $?\npip install pytest pytest-xdist || travis_terminate $?\n\n# "
},
{
"path": "conftest.py",
"chars": 388,
"preview": "# Even if empty this file is useful so that when running from the root folder\n# ./sklearn is added to sys.path by pytest"
},
{
"path": "doc/Makefile",
"chars": 4211,
"preview": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS = -j auto\nSPHI"
},
{
"path": "doc/README.md",
"chars": 253,
"preview": "# Documentation for scikit-learn\n\nThis directory contains the full manual and website as displayed at\nhttp://scikit-lear"
},
{
"path": "doc/about.rst",
"chars": 15512,
"preview": ".. _about:\n\nAbout us\n========\n\nHistory\n-------\n\nThis project was started in 2007 as a Google Summer of Code project by\nD"
},
{
"path": "doc/authors.rst",
"chars": 4209,
"preview": ".. raw :: html\n\n <!-- Generated by generate_authors_table.py -->\n <div class=\"sk-authors-container\">\n <style>\n "
},
{
"path": "doc/authors_emeritus.rst",
"chars": 574,
"preview": "- Mathieu Blondel\n- Matthieu Brucher\n- Lars Buitinck\n- David Cournapeau\n- Noel Dawe\n- Vincent Dubourg\n- Edouard Duchesna"
},
{
"path": "doc/binder/requirements.txt",
"chars": 255,
"preview": "# A binder requirement file is required by sphinx-gallery.\n# We don't really need one since our binder requirement file "
},
{
"path": "doc/common_pitfalls.rst",
"chars": 25003,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. include:: includes/big_toc_css.rst\n\n.. _common_pitfalls:\n\n=="
},
{
"path": "doc/communication_team.rst",
"chars": 545,
"preview": ".. raw :: html\n\n <!-- Generated by generate_authors_table.py -->\n <div class=\"sk-authors-container\">\n <style>\n "
},
{
"path": "doc/computing/computational_performance.rst",
"chars": 17322,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. _computational_performance:\n\n.. currentmodule:: sklearn\n\nCom"
},
{
"path": "doc/computing/parallelism.rst",
"chars": 8388,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\nParallelism, resource management, and configuration\n==========="
},
{
"path": "doc/computing/scaling_strategies.rst",
"chars": 6368,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. _scaling_strategies:\n\nStrategies to scale computationally: b"
},
{
"path": "doc/computing.rst",
"chars": 313,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n============================\nComputing with scikit-learn\n======"
},
{
"path": "doc/conf.py",
"chars": 17544,
"preview": "# -*- coding: utf-8 -*-\n#\n# scikit-learn documentation build configuration file, created by\n# sphinx-quickstart on Fri J"
},
{
"path": "doc/conftest.py",
"chars": 5191,
"preview": "import os\nfrom os.path import exists\nfrom os.path import join\nfrom os import environ\nimport warnings\n\nfrom sklearn.utils"
},
{
"path": "doc/contents.rst",
"chars": 406,
"preview": ".. include:: includes/big_toc_css.rst\n.. include:: tune_toc.rst\n\n.. Places global toc into the sidebar\n\n:globalsidebarto"
},
{
"path": "doc/data_transforms.rst",
"chars": 1381,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. include:: includes/big_toc_css.rst\n\n.. _data-transforms:\n\nDa"
},
{
"path": "doc/datasets/loading_other_datasets.rst",
"chars": 10530,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. _loading_other_datasets:\n\nLoading other datasets\n==========="
},
{
"path": "doc/datasets/real_world.rst",
"chars": 917,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. _real_world_datasets:\n\nReal world datasets\n================="
},
{
"path": "doc/datasets/sample_generators.rst",
"chars": 4095,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. _sample_generators:\n\nGenerated datasets\n==================\n\n"
},
{
"path": "doc/datasets/toy_dataset.rst",
"chars": 1057,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. _toy_datasets:\n\nToy datasets\n============\n\n.. currentmodule:"
},
{
"path": "doc/datasets.rst",
"chars": 2657,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. include:: includes/big_toc_css.rst\n\n.. _datasets:\n\n========="
},
{
"path": "doc/developers/advanced_installation.rst",
"chars": 16978,
"preview": "\n.. _advanced-installation:\n\n.. include:: ../min_dependency_substitutions.rst\n\n========================================="
},
{
"path": "doc/developers/bug_triaging.rst",
"chars": 6273,
"preview": ".. _bug_triaging:\n\nBug triaging and issue curation\n===============================\n\nThe `issue tracker <https://github.c"
},
{
"path": "doc/developers/contributing.rst",
"chars": 56857,
"preview": ".. _contributing:\n\n============\nContributing\n============\n\n.. currentmodule:: sklearn\n\nThis project is a community effor"
},
{
"path": "doc/developers/develop.rst",
"chars": 32990,
"preview": ".. _develop:\n\n==================================\nDeveloping scikit-learn estimators\n==================================\n\n"
},
{
"path": "doc/developers/index.rst",
"chars": 350,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. _developers_guide:\n\n=================\nDeveloper's Guide\n===="
},
{
"path": "doc/developers/maintainer.rst",
"chars": 18041,
"preview": "Maintainer / core-developer information\n========================================\n\n\nReleasing\n---------\n\nThis section is "
},
{
"path": "doc/developers/performance.rst",
"chars": 17241,
"preview": ".. _performance-howto:\n\n=========================\nHow to optimize for speed\n=========================\n\nThe following giv"
},
{
"path": "doc/developers/plotting.rst",
"chars": 4372,
"preview": ".. _plotting_api:\n\n================================\nDeveloping with the Plotting API\n================================\n\nS"
},
{
"path": "doc/developers/tips.rst",
"chars": 14578,
"preview": ".. _developers-tips:\n\n===========================\nDevelopers' Tips and Tricks\n===========================\n\nProductivity "
},
{
"path": "doc/developers/utilities.rst",
"chars": 8863,
"preview": ".. _developers-utils:\n\n========================\nUtilities for Developers\n========================\n\nScikit-learn contains"
},
{
"path": "doc/faq.rst",
"chars": 21280,
"preview": ".. _faq:\n\n===========================\nFrequently Asked Questions\n===========================\n\n.. currentmodule:: sklearn"
},
{
"path": "doc/getting_started.rst",
"chars": 10277,
"preview": "Getting Started\n===============\n\nThe purpose of this guide is to illustrate some of the main features that\n``scikit-lear"
},
{
"path": "doc/glossary.rst",
"chars": 86061,
"preview": ".. currentmodule:: sklearn\n\n.. _glossary:\n\n=========================================\nGlossary of Common Terms and API El"
},
{
"path": "doc/governance.rst",
"chars": 8691,
"preview": ".. _governance:\n\n===========================================\nScikit-learn governance and decision-making\n==============="
},
{
"path": "doc/includes/big_toc_css.rst",
"chars": 730,
"preview": ".. \n File to ..include in a document with a big table of content, to give\n it 'style'\n\n.. raw:: html\n\n <style ty"
},
{
"path": "doc/includes/bigger_toc_css.rst",
"chars": 1035,
"preview": ".. \n File to ..include in a document with a very big table of content, to \n give it 'style'\n\n.. raw:: html\n\n <st"
},
{
"path": "doc/inspection.rst",
"chars": 1086,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. include:: includes/big_toc_css.rst\n\n.. _inspection:\n\nInspect"
},
{
"path": "doc/install.rst",
"chars": 14616,
"preview": ".. _installation-instructions:\n\n=======================\nInstalling scikit-learn\n=======================\n\nThere are diffe"
},
{
"path": "doc/make.bat",
"chars": 3308,
"preview": "@ECHO OFF\n\nREM Command file for Sphinx documentation\n\nset SPHINXBUILD=sphinx-build\nset BUILDDIR=_build\nset ALLSPHINXOPTS"
},
{
"path": "doc/model_selection.rst",
"chars": 321,
"preview": ".. Places parent toc into the sidebar\n\n:parenttoc: True\n\n.. include:: includes/big_toc_css.rst\n\n.. _model_selection:\n\nMo"
},
{
"path": "doc/modules/biclustering.rst",
"chars": 11890,
"preview": ".. _biclustering:\n\n============\nBiclustering\n============\n\nBiclustering can be performed with the module\n:mod:`sklearn.c"
},
{
"path": "doc/modules/calibration.rst",
"chars": 13617,
"preview": ".. _calibration:\n\n=======================\nProbability calibration\n=======================\n\n.. currentmodule:: sklearn.ca"
},
{
"path": "doc/modules/classes.rst",
"chars": 38021,
"preview": ".. _api_ref:\n\n=============\nAPI Reference\n=============\n\nThis is the class and function reference of scikit-learn. Pleas"
},
{
"path": "doc/modules/clustering.rst",
"chars": 83852,
"preview": ".. _clustering:\n\n==========\nClustering\n==========\n\n`Clustering <https://en.wikipedia.org/wiki/Cluster_analysis>`__ of\nun"
},
{
"path": "doc/modules/compose.rst",
"chars": 23330,
"preview": "\n.. _combining_estimators:\n\n==================================\nPipelines and composite estimators\n======================"
},
{
"path": "doc/modules/covariance.rst",
"chars": 14803,
"preview": ".. _covariance:\n\n===================================================\nCovariance estimation\n============================="
},
{
"path": "doc/modules/cross_decomposition.rst",
"chars": 8906,
"preview": ".. _cross_decomposition:\n\n===================\nCross decomposition\n===================\n\n.. currentmodule:: sklearn.cross_"
},
{
"path": "doc/modules/cross_validation.rst",
"chars": 39848,
"preview": "\n.. _cross_validation:\n\n===================================================\nCross-validation: evaluating estimator perfo"
},
{
"path": "doc/modules/decomposition.rst",
"chars": 45787,
"preview": ".. _decompositions:\n\n\n=================================================================\nDecomposing signals in component"
},
{
"path": "doc/modules/density.rst",
"chars": 7722,
"preview": ".. _density_estimation:\n\n==================\nDensity Estimation\n==================\n.. sectionauthor:: Jake Vanderplas <va"
},
{
"path": "doc/modules/ensemble.rst",
"chars": 65923,
"preview": ".. _ensemble:\n\n================\nEnsemble methods\n================\n\n.. currentmodule:: sklearn.ensemble\n\nThe goal of **en"
},
{
"path": "doc/modules/feature_extraction.rst",
"chars": 44272,
"preview": ".. _feature_extraction:\n\n==================\nFeature extraction\n==================\n\n.. currentmodule:: sklearn.feature_ex"
},
{
"path": "doc/modules/feature_selection.rst",
"chars": 13775,
"preview": ".. currentmodule:: sklearn.feature_selection\n\n.. _feature_selection:\n\n=================\nFeature selection\n=============="
},
{
"path": "doc/modules/gaussian_process.rst",
"chars": 31148,
"preview": "\n\n.. _gaussian_process:\n\n==================\nGaussian Processes\n==================\n\n.. currentmodule:: sklearn.gaussian_p"
},
{
"path": "doc/modules/grid_search.rst",
"chars": 34002,
"preview": "\n\n.. currentmodule:: sklearn.model_selection\n\n.. _grid_search:\n\n===========================================\nTuning the h"
},
{
"path": "doc/modules/impute.rst",
"chars": 13569,
"preview": ".. _impute:\n\n============================\nImputation of missing values\n============================\n\n.. currentmodule:: "
},
{
"path": "doc/modules/isotonic.rst",
"chars": 1306,
"preview": ".. _isotonic:\n\n===================\nIsotonic regression\n===================\n\n.. currentmodule:: sklearn.isotonic\n\nThe cla"
},
{
"path": "doc/modules/kernel_approximation.rst",
"chars": 11231,
"preview": ".. _kernel_approximation:\n\nKernel Approximation\n====================\n\nThis submodule contains functions that approximate"
},
{
"path": "doc/modules/kernel_ridge.rst",
"chars": 3225,
"preview": ".. _kernel_ridge:\n\n===========================\nKernel ridge regression\n===========================\n\n.. currentmodule:: s"
},
{
"path": "doc/modules/lda_qda.rst",
"chars": 12122,
"preview": ".. _lda_qda:\n\n==========================================\nLinear and Quadratic Discriminant Analysis\n===================="
},
{
"path": "doc/modules/learning_curve.rst",
"chars": 6824,
"preview": ".. _learning_curves:\n\n=====================================================\nValidation curves: plotting scores to evalua"
}
]
// ... and 1069 more files (download for full content)
About this extraction
This page contains the full source code of the norbusan/scikit-learn GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 1269 files (14.3 MB), approximately 3.8M tokens, and a symbol index with 9625 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.