Showing preview only (2,508K chars total). Download the full file or copy to clipboard to get everything.
Repository: alteryx/featuretools
Branch: main
Commit: 938a0f6ccb98
Files: 501
Total size: 2.3 MB
Directory structure:
gitextract_b07mgx0i/
├── .codecov.yml
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── blank_issue.md
│ │ ├── bug_report.md
│ │ ├── config.yml
│ │ ├── documentation_improvement.md
│ │ └── feature_request.md
│ ├── auto_assign.yml
│ └── workflows/
│ ├── auto_approve_dependency_PRs.yaml
│ ├── broken_link_check.yaml
│ ├── build_docs.yaml
│ ├── create_feedstock_pr.yaml
│ ├── install_test.yaml
│ ├── kickoff_evalml_unit_tests.yaml
│ ├── latest_dependency_checker.yaml
│ ├── lint_check.yaml
│ ├── minimum_dependency_checker.yaml
│ ├── performance-check.yaml
│ ├── pull_request_check.yaml
│ ├── release.yaml
│ ├── release_notes_updated.yaml
│ ├── test_without_test_dependencies.yaml
│ ├── tests_with_latest_deps.yaml
│ ├── tests_with_minimum_deps.yaml
│ └── tests_with_woodwork_main_branch.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── LICENSE
├── Makefile
├── README.md
├── contributing.md
├── docs/
│ ├── Makefile
│ ├── backport_release.md
│ ├── make.bat
│ ├── notebook_version_standardizer.py
│ ├── pull_request_template.md
│ └── source/
│ ├── _static/
│ │ └── style.css
│ ├── api_reference.rst
│ ├── conf.py
│ ├── getting_started/
│ │ ├── afe.ipynb
│ │ ├── getting_started_index.rst
│ │ ├── handling_time.ipynb
│ │ ├── primitives.ipynb
│ │ ├── using_entitysets.ipynb
│ │ └── woodwork_types.ipynb
│ ├── guides/
│ │ ├── advanced_custom_primitives.ipynb
│ │ ├── deployment.ipynb
│ │ ├── feature_descriptions.ipynb
│ │ ├── feature_selection.ipynb
│ │ ├── guides_index.rst
│ │ ├── performance.ipynb
│ │ ├── specifying_primitive_options.ipynb
│ │ ├── sql_database_integration.ipynb
│ │ ├── time_series.ipynb
│ │ └── tuning_dfs.ipynb
│ ├── index.ipynb
│ ├── install.md
│ ├── release_notes.rst
│ ├── resources/
│ │ ├── ecosystem.rst
│ │ ├── frequently_asked_questions.ipynb
│ │ ├── help.rst
│ │ ├── resources_index.rst
│ │ ├── transition_to_ft_v1.0.ipynb
│ │ └── usage_tips/
│ │ ├── glossary.rst
│ │ └── limitations.rst
│ ├── set-headers.py
│ ├── setup.py
│ └── templates/
│ └── layout.html
├── featuretools/
│ ├── __init__.py
│ ├── __main__.py
│ ├── computational_backends/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── calculate_feature_matrix.py
│ │ ├── feature_set.py
│ │ ├── feature_set_calculator.py
│ │ └── utils.py
│ ├── config_init.py
│ ├── demo/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── flight.py
│ │ ├── mock_customer.py
│ │ ├── retail.py
│ │ └── weather.py
│ ├── entityset/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── deserialize.py
│ │ ├── entityset.py
│ │ ├── relationship.py
│ │ ├── serialize.py
│ │ └── timedelta.py
│ ├── exceptions.py
│ ├── feature_base/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── cache.py
│ │ ├── feature_base.py
│ │ ├── feature_descriptions.py
│ │ ├── feature_visualizer.py
│ │ ├── features_deserializer.py
│ │ ├── features_serializer.py
│ │ └── utils.py
│ ├── feature_discovery/
│ │ ├── FeatureCollection.py
│ │ ├── LiteFeature.py
│ │ ├── __init__.py
│ │ ├── convertors.py
│ │ ├── feature_discovery.py
│ │ ├── type_defs.py
│ │ └── utils.py
│ ├── primitives/
│ │ ├── __init__.py
│ │ ├── base/
│ │ │ ├── __init__.py
│ │ │ ├── aggregation_primitive_base.py
│ │ │ ├── primitive_base.py
│ │ │ └── transform_primitive_base.py
│ │ ├── options_utils.py
│ │ ├── standard/
│ │ │ ├── __init__.py
│ │ │ ├── aggregation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── all_primitive.py
│ │ │ │ ├── any_primitive.py
│ │ │ │ ├── average_count_per_unique.py
│ │ │ │ ├── avg_time_between.py
│ │ │ │ ├── count.py
│ │ │ │ ├── count_above_mean.py
│ │ │ │ ├── count_below_mean.py
│ │ │ │ ├── count_greater_than.py
│ │ │ │ ├── count_inside_nth_std.py
│ │ │ │ ├── count_inside_range.py
│ │ │ │ ├── count_less_than.py
│ │ │ │ ├── count_outside_nth_std.py
│ │ │ │ ├── count_outside_range.py
│ │ │ │ ├── date_first_event.py
│ │ │ │ ├── entropy.py
│ │ │ │ ├── first.py
│ │ │ │ ├── first_last_time_delta.py
│ │ │ │ ├── has_no_duplicates.py
│ │ │ │ ├── is_monotonically_decreasing.py
│ │ │ │ ├── is_monotonically_increasing.py
│ │ │ │ ├── is_unique.py
│ │ │ │ ├── kurtosis.py
│ │ │ │ ├── last.py
│ │ │ │ ├── max_consecutive_false.py
│ │ │ │ ├── max_consecutive_negatives.py
│ │ │ │ ├── max_consecutive_positives.py
│ │ │ │ ├── max_consecutive_true.py
│ │ │ │ ├── max_consecutive_zeros.py
│ │ │ │ ├── max_count.py
│ │ │ │ ├── max_min_delta.py
│ │ │ │ ├── max_primitive.py
│ │ │ │ ├── mean.py
│ │ │ │ ├── median.py
│ │ │ │ ├── median_count.py
│ │ │ │ ├── min_count.py
│ │ │ │ ├── min_primitive.py
│ │ │ │ ├── mode.py
│ │ │ │ ├── n_most_common.py
│ │ │ │ ├── n_most_common_frequency.py
│ │ │ │ ├── n_unique_days.py
│ │ │ │ ├── n_unique_days_of_calendar_year.py
│ │ │ │ ├── n_unique_days_of_month.py
│ │ │ │ ├── n_unique_months.py
│ │ │ │ ├── n_unique_weeks.py
│ │ │ │ ├── num_consecutive_greater_mean.py
│ │ │ │ ├── num_consecutive_less_mean.py
│ │ │ │ ├── num_false_since_last_true.py
│ │ │ │ ├── num_peaks.py
│ │ │ │ ├── num_true.py
│ │ │ │ ├── num_true_since_last_false.py
│ │ │ │ ├── num_unique.py
│ │ │ │ ├── num_zero_crossings.py
│ │ │ │ ├── percent_true.py
│ │ │ │ ├── percent_unique.py
│ │ │ │ ├── skew.py
│ │ │ │ ├── std.py
│ │ │ │ ├── sum_primitive.py
│ │ │ │ ├── time_since_first.py
│ │ │ │ ├── time_since_last.py
│ │ │ │ ├── time_since_last_false.py
│ │ │ │ ├── time_since_last_max.py
│ │ │ │ ├── time_since_last_min.py
│ │ │ │ ├── time_since_last_true.py
│ │ │ │ ├── trend.py
│ │ │ │ └── variance.py
│ │ │ └── transform/
│ │ │ ├── __init__.py
│ │ │ ├── absolute_diff.py
│ │ │ ├── binary/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── add_numeric.py
│ │ │ │ ├── add_numeric_scalar.py
│ │ │ │ ├── and_primitive.py
│ │ │ │ ├── divide_by_feature.py
│ │ │ │ ├── divide_numeric.py
│ │ │ │ ├── divide_numeric_scalar.py
│ │ │ │ ├── equal.py
│ │ │ │ ├── equal_scalar.py
│ │ │ │ ├── greater_than.py
│ │ │ │ ├── greater_than_equal_to.py
│ │ │ │ ├── greater_than_equal_to_scalar.py
│ │ │ │ ├── greater_than_scalar.py
│ │ │ │ ├── less_than.py
│ │ │ │ ├── less_than_equal_to.py
│ │ │ │ ├── less_than_equal_to_scalar.py
│ │ │ │ ├── less_than_scalar.py
│ │ │ │ ├── modulo_by_feature.py
│ │ │ │ ├── modulo_numeric.py
│ │ │ │ ├── modulo_numeric_scalar.py
│ │ │ │ ├── multiply_boolean.py
│ │ │ │ ├── multiply_numeric.py
│ │ │ │ ├── multiply_numeric_boolean.py
│ │ │ │ ├── multiply_numeric_scalar.py
│ │ │ │ ├── not_equal.py
│ │ │ │ ├── not_equal_scalar.py
│ │ │ │ ├── or_primitive.py
│ │ │ │ ├── scalar_subtract_numeric_feature.py
│ │ │ │ ├── subtract_numeric.py
│ │ │ │ └── subtract_numeric_scalar.py
│ │ │ ├── cumulative/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── cum_count.py
│ │ │ │ ├── cum_max.py
│ │ │ │ ├── cum_mean.py
│ │ │ │ ├── cum_min.py
│ │ │ │ ├── cum_sum.py
│ │ │ │ ├── cumulative_time_since_last_false.py
│ │ │ │ └── cumulative_time_since_last_true.py
│ │ │ ├── datetime/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── age.py
│ │ │ │ ├── date_to_holiday.py
│ │ │ │ ├── date_to_timezone.py
│ │ │ │ ├── day.py
│ │ │ │ ├── day_of_year.py
│ │ │ │ ├── days_in_month.py
│ │ │ │ ├── diff_datetime.py
│ │ │ │ ├── distance_to_holiday.py
│ │ │ │ ├── hour.py
│ │ │ │ ├── is_federal_holiday.py
│ │ │ │ ├── is_first_week_of_month.py
│ │ │ │ ├── is_leap_year.py
│ │ │ │ ├── is_lunch_time.py
│ │ │ │ ├── is_month_end.py
│ │ │ │ ├── is_month_start.py
│ │ │ │ ├── is_quarter_end.py
│ │ │ │ ├── is_quarter_start.py
│ │ │ │ ├── is_weekend.py
│ │ │ │ ├── is_working_hours.py
│ │ │ │ ├── is_year_end.py
│ │ │ │ ├── is_year_start.py
│ │ │ │ ├── minute.py
│ │ │ │ ├── month.py
│ │ │ │ ├── part_of_day.py
│ │ │ │ ├── quarter.py
│ │ │ │ ├── season.py
│ │ │ │ ├── second.py
│ │ │ │ ├── time_since.py
│ │ │ │ ├── time_since_previous.py
│ │ │ │ ├── utils.py
│ │ │ │ ├── week.py
│ │ │ │ ├── weekday.py
│ │ │ │ └── year.py
│ │ │ ├── email/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── email_address_to_domain.py
│ │ │ │ └── is_free_email_domain.py
│ │ │ ├── exponential/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── exponential_weighted_average.py
│ │ │ │ ├── exponential_weighted_std.py
│ │ │ │ └── exponential_weighted_variance.py
│ │ │ ├── file_extension.py
│ │ │ ├── full_name_to_first_name.py
│ │ │ ├── full_name_to_last_name.py
│ │ │ ├── full_name_to_title.py
│ │ │ ├── is_in.py
│ │ │ ├── is_null.py
│ │ │ ├── latlong/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── cityblock_distance.py
│ │ │ │ ├── geomidpoint.py
│ │ │ │ ├── haversine.py
│ │ │ │ ├── is_in_geobox.py
│ │ │ │ ├── latitude.py
│ │ │ │ ├── longitude.py
│ │ │ │ └── utils.py
│ │ │ ├── natural_language/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── constants.py
│ │ │ │ ├── count_string.py
│ │ │ │ ├── mean_characters_per_word.py
│ │ │ │ ├── median_word_length.py
│ │ │ │ ├── num_characters.py
│ │ │ │ ├── num_unique_separators.py
│ │ │ │ ├── num_words.py
│ │ │ │ ├── number_of_common_words.py
│ │ │ │ ├── number_of_hashtags.py
│ │ │ │ ├── number_of_mentions.py
│ │ │ │ ├── number_of_unique_words.py
│ │ │ │ ├── number_of_words_in_quotes.py
│ │ │ │ ├── punctuation_count.py
│ │ │ │ ├── title_word_count.py
│ │ │ │ ├── total_word_length.py
│ │ │ │ ├── upper_case_count.py
│ │ │ │ ├── upper_case_word_count.py
│ │ │ │ └── whitespace_count.py
│ │ │ ├── not_primitive.py
│ │ │ ├── nth_week_of_month.py
│ │ │ ├── numeric/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── absolute.py
│ │ │ │ ├── cosine.py
│ │ │ │ ├── diff.py
│ │ │ │ ├── natural_logarithm.py
│ │ │ │ ├── negate.py
│ │ │ │ ├── percentile.py
│ │ │ │ ├── rate_of_change.py
│ │ │ │ ├── same_as_previous.py
│ │ │ │ ├── sine.py
│ │ │ │ ├── square_root.py
│ │ │ │ └── tangent.py
│ │ │ ├── percent_change.py
│ │ │ ├── postal/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── one_digit_postal_code.py
│ │ │ │ └── two_digit_postal_code.py
│ │ │ ├── savgol_filter.py
│ │ │ ├── time_series/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── expanding/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── expanding_count.py
│ │ │ │ │ ├── expanding_max.py
│ │ │ │ │ ├── expanding_mean.py
│ │ │ │ │ ├── expanding_min.py
│ │ │ │ │ ├── expanding_std.py
│ │ │ │ │ └── expanding_trend.py
│ │ │ │ ├── lag.py
│ │ │ │ ├── numeric_lag.py
│ │ │ │ ├── rolling_count.py
│ │ │ │ ├── rolling_max.py
│ │ │ │ ├── rolling_mean.py
│ │ │ │ ├── rolling_min.py
│ │ │ │ ├── rolling_outlier_count.py
│ │ │ │ ├── rolling_std.py
│ │ │ │ ├── rolling_trend.py
│ │ │ │ └── utils.py
│ │ │ └── url/
│ │ │ ├── __init__.py
│ │ │ ├── url_to_domain.py
│ │ │ ├── url_to_protocol.py
│ │ │ └── url_to_tld.py
│ │ └── utils.py
│ ├── selection/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ └── selection.py
│ ├── synthesis/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── deep_feature_synthesis.py
│ │ ├── dfs.py
│ │ ├── encode_features.py
│ │ ├── get_valid_primitives.py
│ │ └── utils.py
│ ├── tests/
│ │ ├── __init__.py
│ │ ├── computational_backend/
│ │ │ ├── __init__.py
│ │ │ ├── test_calculate_feature_matrix.py
│ │ │ ├── test_feature_set.py
│ │ │ ├── test_feature_set_calculator.py
│ │ │ └── test_utils.py
│ │ ├── config_tests/
│ │ │ ├── __init__.py
│ │ │ └── test_config.py
│ │ ├── conftest.py
│ │ ├── demo_tests/
│ │ │ ├── __init__.py
│ │ │ └── test_demo_data.py
│ │ ├── entityset_tests/
│ │ │ ├── __init__.py
│ │ │ ├── test_es.py
│ │ │ ├── test_es_metadata.py
│ │ │ ├── test_last_time_index.py
│ │ │ ├── test_plotting.py
│ │ │ ├── test_relationship.py
│ │ │ ├── test_serialization.py
│ │ │ ├── test_timedelta.py
│ │ │ └── test_ww_es.py
│ │ ├── entry_point_tests/
│ │ │ ├── __init__.py
│ │ │ ├── add-ons/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── featuretools_plugin/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── featuretools_plugin/
│ │ │ │ │ │ └── __init__.py
│ │ │ │ │ └── setup.py
│ │ │ │ └── featuretools_primitives/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── featuretools_primitives/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── existing_primitive.py
│ │ │ │ │ ├── invalid_primitive.py
│ │ │ │ │ └── new_primitive.py
│ │ │ │ └── setup.py
│ │ │ ├── test_plugin.py
│ │ │ ├── test_primitives.py
│ │ │ └── utils.py
│ │ ├── feature_discovery/
│ │ │ ├── __init__.py
│ │ │ ├── test_convertors.py
│ │ │ ├── test_feature_collection.py
│ │ │ ├── test_feature_discovery.py
│ │ │ └── test_type_defs.py
│ │ ├── primitive_tests/
│ │ │ ├── __init__.py
│ │ │ ├── aggregation_primitive_tests/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_agg_primitives.py
│ │ │ │ ├── test_count_aggregation_primitives.py
│ │ │ │ ├── test_max_consecutive.py
│ │ │ │ ├── test_num_consecutive.py
│ │ │ │ ├── test_percent_true.py
│ │ │ │ ├── test_rolling_primitive.py
│ │ │ │ └── test_time_since.py
│ │ │ ├── bad_primitive_files/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── multiple_primitives.py
│ │ │ │ └── no_primitives.py
│ │ │ ├── natural_language_primitives_tests/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_count_string.py
│ │ │ │ ├── test_mean_characters_per_word.py
│ │ │ │ ├── test_median_word_length.py
│ │ │ │ ├── test_natural_language_primitives_terminate.py
│ │ │ │ ├── test_num_characters.py
│ │ │ │ ├── test_num_unique_separators.py
│ │ │ │ ├── test_num_words.py
│ │ │ │ ├── test_number_of_common_words.py
│ │ │ │ ├── test_number_of_hashtags.py
│ │ │ │ ├── test_number_of_mentions.py
│ │ │ │ ├── test_number_of_unique_words.py
│ │ │ │ ├── test_number_of_words_in_quotes.py
│ │ │ │ ├── test_punctuation_count.py
│ │ │ │ ├── test_title_word_count.py
│ │ │ │ ├── test_total_word_length.py
│ │ │ │ ├── test_upper_case_count.py
│ │ │ │ ├── test_upper_case_word_count.py
│ │ │ │ └── test_whitespace_count.py
│ │ │ ├── primitives_to_install/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── custom_max.py
│ │ │ │ ├── custom_mean.py
│ │ │ │ └── custom_sum.py
│ │ │ ├── test_absolute_diff.py
│ │ │ ├── test_agg_feats.py
│ │ │ ├── test_all_primitive_docstrings.py
│ │ │ ├── test_direct_features.py
│ │ │ ├── test_feature_base.py
│ │ │ ├── test_feature_descriptions.py
│ │ │ ├── test_feature_serialization.py
│ │ │ ├── test_feature_utils.py
│ │ │ ├── test_feature_visualizer.py
│ │ │ ├── test_features_deserializer.py
│ │ │ ├── test_features_serializer.py
│ │ │ ├── test_groupby_transform_primitives.py
│ │ │ ├── test_identity_features.py
│ │ │ ├── test_overrides.py
│ │ │ ├── test_primitive_base.py
│ │ │ ├── test_primitive_utils.py
│ │ │ ├── test_rolling_primitive_utils.py
│ │ │ ├── test_transform_features.py
│ │ │ ├── transform_primitive_tests/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_cumulative_time_since.py
│ │ │ │ ├── test_datetoholiday_primitive.py
│ │ │ │ ├── test_distancetoholiday_primitive.py
│ │ │ │ ├── test_expanding_primitives.py
│ │ │ │ ├── test_exponential_primitives.py
│ │ │ │ ├── test_full_name_primitives.py
│ │ │ │ ├── test_is_federal_holiday.py
│ │ │ │ ├── test_latlong_primitives.py
│ │ │ │ ├── test_percent_change.py
│ │ │ │ ├── test_percent_unique.py
│ │ │ │ ├── test_postal_primitives.py
│ │ │ │ ├── test_same_as_previous.py
│ │ │ │ ├── test_savgol_filter.py
│ │ │ │ ├── test_season.py
│ │ │ │ └── test_transform_primitive.py
│ │ │ └── utils.py
│ │ ├── profiling/
│ │ │ ├── __init__.py
│ │ │ └── dfs_profile.py
│ │ ├── requirement_files/
│ │ │ ├── latest_requirements.txt
│ │ │ ├── minimum_core_requirements.txt
│ │ │ ├── minimum_dask_requirements.txt
│ │ │ └── minimum_test_requirements.txt
│ │ ├── selection/
│ │ │ ├── __init__.py
│ │ │ └── test_selection.py
│ │ ├── synthesis/
│ │ │ ├── __init__.py
│ │ │ ├── test_deep_feature_synthesis.py
│ │ │ ├── test_dfs_method.py
│ │ │ ├── test_encode_features.py
│ │ │ └── test_get_valid_primitives.py
│ │ ├── test_version.py
│ │ ├── testing_utils/
│ │ │ ├── __init__.py
│ │ │ ├── cluster.py
│ │ │ ├── es_utils.py
│ │ │ ├── features.py
│ │ │ ├── generate_fake_dataframe.py
│ │ │ └── mock_ds.py
│ │ └── utils_tests/
│ │ ├── __init__.py
│ │ ├── test_config.py
│ │ ├── test_description_utils.py
│ │ ├── test_entry_point.py
│ │ ├── test_gen_utils.py
│ │ ├── test_recommend_primitives.py
│ │ ├── test_time_utils.py
│ │ ├── test_trie.py
│ │ └── test_utils_info.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── common_tld_utils.py
│ │ ├── description_utils.py
│ │ ├── entry_point.py
│ │ ├── gen_utils.py
│ │ ├── plot_utils.py
│ │ ├── recommend_primitives.py
│ │ ├── s3_utils.py
│ │ ├── schema_utils.py
│ │ ├── time_utils.py
│ │ ├── trie.py
│ │ ├── utils_info.py
│ │ └── wrangle.py
│ └── version.py
├── pyproject.toml
└── release.md
================================================
FILE CONTENTS
================================================
================================================
FILE: .codecov.yml
================================================
codecov:
notify:
after_n_builds: 5
================================================
FILE: .github/ISSUE_TEMPLATE/blank_issue.md
================================================
---
name: Blank Issue
about: Create a blank issue
title: ''
labels: ''
assignees: ''
---
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug Report
about: Create a bug report to help us improve Featuretools
title: ''
labels: 'bug'
assignees: ''
---
[A clear and concise description of what the bug is.]
#### Code Sample, a copy-pastable example to reproduce your bug.
```python
# Your code here
```
#### Output of ``featuretools.show_info()``
<details>
[paste the output of ``featuretools.show_info()`` here below this line]
</details>
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
contact_links:
- name: General Technical Question
about: "If you have a question like *How should I create my EntitySet?* you can ask on StackOverflow using the #featuretools tag."
url: https://stackoverflow.com/questions/tagged/featuretools
- name: Real-time chat
url: https://join.slack.com/t/alteryx-oss/shared_invite/zt-182tyvuxv-NzIn6eiCEf8TBziuKp0bNA
about: "If you want to meet others in the community and chat about all things Alteryx OSS then check out our Slack."
================================================
FILE: .github/ISSUE_TEMPLATE/documentation_improvement.md
================================================
---
name: Documentation Improvement
about: Suggest an idea for improving the documentation
title: ''
labels: 'documentation'
assignees: ''
---
[a description of what documentation you believe needs to be fixed/improved]
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature Request
about: Suggest an idea for this project
title: ''
labels: 'new feature'
assignees: ''
---
- As a [user/developer], I wish I could use Featuretools to ...
#### Code Example
```python
# Your code here, if applicable
```
================================================
FILE: .github/auto_assign.yml
================================================
# Set to author to set pr creator as assignee
addAssignees: author
================================================
FILE: .github/workflows/auto_approve_dependency_PRs.yaml
================================================
name: Auto Approve Dependency PRs
on:
schedule:
- cron: '*/30 * * * *'
workflow_dispatch:
workflow_run:
workflows: ["Unit Tests - Latest Dependencies", "Unit Tests - 3.9 Minimum Dependencies"]
branches:
- 'latest-dep-update-[a-f0-9]+'
- 'min-dep-update-[a-f0-9]+'
types:
- completed
jobs:
build:
if: ${{ github.repository_owner == 'alteryx' }}
runs-on: ubuntu-latest
steps:
- name: Find dependency PRs
id: find_prs
run: |
gh auth status
gh pr list --repo "${{ github.repository }}" --assignee "machineFL" --base main --state open --search "status:success review:required" --limit 1 --json number > dep_PRs_waiting_approval.json
dep_pull_request=$(cat dep_PRs_waiting_approval.json | grep -Eo "[0-9]*")
echo ::set-output name=dep_pull_request::${dep_pull_request}
env:
GITHUB_TOKEN: ${{ secrets.AUTO_APPROVE_TOKEN }}
- name: Approve dependency PRs and enable auto-merge
if: ${{ steps.find_prs.outputs.dep_pull_request > 1 }}
run: |
gh pr review --repo "${{ github.repository }}" --comment --body "auto approve" ${{ steps.find_prs.outputs.dep_pull_request }}
gh pr review --repo "${{ github.repository }}" --approve ${{ steps.find_prs.outputs.dep_pull_request }}
gh pr merge --repo "${{ github.repository }}" --auto --squash --delete-branch ${{ steps.find_prs.outputs.dep_pull_request }}
env:
GITHUB_TOKEN: ${{ secrets.AUTO_APPROVE_TOKEN }}
================================================
FILE: .github/workflows/broken_link_check.yaml
================================================
name: Broken link check
on:
workflow_dispatch:
schedule:
- cron: "* * * * 1"
jobs:
my-broken-link-checker:
name: Check for broken links
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- name: Check for broken links
uses: ruzickap/action-my-broken-link-checker@v2
with:
url: https://featuretools.alteryx.com/en/latest/
cmd_params: '--max-connections=10 --color=always --ignore-fragments --buffer-size=8192 --skip-tls-verification --exclude="(twitter|github|cloudflare|featuretools\\.alteryx\\.com\\/en\\/(stable|main|v.+).*)"'
- name: Add to job output
run: echo "${{steps.link-report.outputs.result}}" >> $GITHUB_STEP_SUMMARY
================================================
FILE: .github/workflows/build_docs.yaml
================================================
name: Build Docs
on:
pull_request:
types: [opened, synchronize]
push:
branches:
- main
workflow_dispatch:
env:
PYARROW_IGNORE_TIMEZONE: 1
JAVA_HOME: "/usr/lib/jvm/java-11-openjdk-amd64"
jobs:
build_docs:
name: ${{ matrix.python_version }} build docs
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python_version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Set up python ${{ matrix.python_version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
cache: 'pip'
cache-dependency-path: 'pyproject.toml'
- uses: actions/cache@v3
id: cache
with:
path: ${{ env.pythonLocation }}
key: ${{ matrix.python_version }}-docs-${{ env.pythonLocation }}-${{ hashFiles('**/pyproject.toml') }}-v01
- name: Build featuretools package
run: |
make package
- name: Install complete version of featuretools from sdist (not using cache)
if: steps.cache.outputs.cache-hit != 'true'
run: |
python -m pip install "unpacked_sdist/[dev]"
- name: Install complete version of featuretools from sdist (using cache)
if: steps.cache.outputs.cache-hit == 'true'
run: |
python -m pip install "unpacked_sdist/[dev]" --no-deps
- name: Install apt packages
run: |
sudo apt update
sudo apt install -y pandoc
sudo apt install -y graphviz
python -m pip check
- name: Build docs
run: make -C docs/ -e "SPHINXOPTS=-W -j auto" clean html
================================================
FILE: .github/workflows/create_feedstock_pr.yaml
================================================
on:
workflow_dispatch:
inputs:
version:
description: 'released PyPI version to use (ex - v1.11.1)'
required: true
name: Create Feedstock PR
jobs:
create_feedstock_pr:
name: Create Feedstock PR
runs-on: ubuntu-latest
steps:
- name: Checkout inputted version
uses: actions/checkout@v3
with:
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.inputs.version }}
path: "./featuretools"
- name: Pull latest from upstream for user forked feedstock
run: |
gh auth status
gh repo sync alteryx/featuretools-feedstock --branch main --source conda-forge/featuretools-feedstock --force
env:
GITHUB_TOKEN: ${{ secrets.AUTO_APPROVE_TOKEN }}
- uses: actions/checkout@v3
with:
repository: alteryx/featuretools-feedstock
ref: main
path: "./featuretools-feedstock"
fetch-depth: '0'
- name: Run Create Feedstock meta YAML
id: create-feedstock-meta
uses: alteryx/create-feedstock-meta-yaml@v4
with:
project: "featuretools"
pypi_version: ${{ github.event.inputs.version }}
project_metadata_filepath: "featuretools/pyproject.toml"
meta_yaml_filepath: "featuretools-feedstock/recipe/meta.yaml"
add_to_test_requirements: "graphviz !=2.47.2"
- name: View updated meta yaml
run: cat featuretools-feedstock/recipe/meta.yaml
- name: Push updated yaml
run: |
cd featuretools-feedstock
git config --unset-all http.https://github.com/.extraheader
git config --global user.email "machineOSS@alteryx.com"
git config --global user.name "machineAYX Bot"
git remote set-url origin https://${{ secrets.AUTO_APPROVE_TOKEN }}@github.com/alteryx/featuretools-feedstock
git checkout -b ${{ github.event.inputs.version }}
git add recipe/meta.yaml
git commit -m "${{ github.event.inputs.version }}"
git push origin ${{ github.event.inputs.version }}
- name: Adding URL to job output
run: |
echo "Conda Feedstock Pull Request: https://github.com/alteryx/featuretools-feedstock/pull/new/${{ github.event.inputs.version }}" >> $GITHUB_STEP_SUMMARY
================================================
FILE: .github/workflows/install_test.yaml
================================================
name: Install Test
on:
pull_request:
types: [opened, synchronize]
push:
branches:
- main
env:
ALTERYX_OPEN_SRC_UPDATE_CHECKER: False
jobs:
install_ft_complete:
name: ${{ matrix.os }} - ${{ matrix.python_version }} install featuretools complete
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python_version: ["3.9", "3.10", "3.11", "3.12"]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Set up python ${{ matrix.python_version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
cache: 'pip'
cache-dependency-path: 'pyproject.toml'
- name: Build featuretools package
run: |
make package
- name: Install complete version of featuretools from sdist
run: |
python -m pip install "unpacked_sdist/[complete]"
- name: Test by importing packages
run: |
python -c "import premium_primitives"
python -c "from nlp_primitives import PolarityScore"
- name: Check package conflicts
run: |
python -m pip check
- name: Verify extra_requires commands
run: |
python -m pip install "unpacked_sdist/[nlp]"
================================================
FILE: .github/workflows/kickoff_evalml_unit_tests.yaml
================================================
name: Kickoff EvalML Unit Tests
on:
push:
branches:
- main
workflow_dispatch:
jobs:
kickoff:
name: Run EvalML unit tests
if: github.repository_owner == 'alteryx'
runs-on: ubuntu-latest
steps:
- name: Run workflow for EvalML unit tests
run: gh workflow run unit_tests_with_featuretools_main_branch.yaml --repo "alteryx/evalml"
env:
GITHUB_TOKEN: ${{ secrets.REPO_SCOPED_TOKEN }}
================================================
FILE: .github/workflows/latest_dependency_checker.yaml
================================================
# This workflow will install dependenies and if any critical dependencies have changed a pull request
# will be created which will trigger a CI run with the new dependencies.
name: Latest Dependency Checker
on:
schedule:
- cron: '0 * * * *'
workflow_dispatch:
jobs:
build:
if: ${{ github.repository_owner == 'alteryx' }}
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Update dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dask,test]"
make checkdeps OUTPUT_PATH=featuretools/tests/requirement_files/latest_requirements.txt
- name: Create pull request
uses: peter-evans/create-pull-request@v3
with:
token: ${{ secrets.REPO_SCOPED_TOKEN }}
commit-message: Update latest dependencies
title: Automated Latest Dependency Updates
author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
body: "This is an auto-generated PR with **latest** dependency updates.
Please do not delete the `latest-dep-update` branch because it's needed by the auto-dependency bot."
branch: latest-dep-update
branch-suffix: short-commit-hash
base: main
assignees: machineFL
reviewers: machineAYX
================================================
FILE: .github/workflows/lint_check.yaml
================================================
name: Lint Check
on:
pull_request:
types: [opened, synchronize]
push:
branches:
- main
jobs:
lint_check:
name: ${{ matrix.python_version }} lint check
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python_version: ["3.12"]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Set up python ${{ matrix.python_version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
cache: 'pip'
cache-dependency-path: 'pyproject.toml'
- uses: actions/cache@v3
id: cache
with:
path: ${{ env.pythonLocation }}
key: ${{ matrix.python_version }}-lint-${{ env.pythonLocation }}-${{ hashFiles('**/pyproject.toml') }}-v01
- name: Install featuretools with optional, dev, and test requirements (not using cache)
if: steps.cache.outputs.cache-hit != 'true'
run: |
python -m pip install -e .[dev]
- name: Install featuretools with no requirements (using cache)
if: steps.cache.outputs.cache-hit == 'true'
run: |
python -m pip install -e .[dev] --no-deps
- name: Run lint test
run: make lint
================================================
FILE: .github/workflows/minimum_dependency_checker.yaml
================================================
name: Minimum Dependency Checker
on:
workflow_dispatch:
push:
branches:
- main
paths:
- 'pyproject.toml'
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Run min dep generator - test reqs
id: min_dep_gen_test
uses: alteryx/minimum-dependency-generator@v3
with:
paths: 'pyproject.toml'
options: 'dependencies'
extras_require: 'test'
output_filepath: featuretools/tests/requirement_files/minimum_test_requirements.txt
- name: Run min dep generator - core reqs
id: min_dep_gen_core
uses: alteryx/minimum-dependency-generator@v3
with:
paths: 'pyproject.toml'
options: 'dependencies'
output_filepath: featuretools/tests/requirement_files/minimum_core_requirements.txt
- name: Run min dep generator - dask
id: min_dep_gen_dask
uses: alteryx/minimum-dependency-generator@v3
with:
paths: 'pyproject.toml'
options: 'dependencies'
extras_require: 'dask'
output_filepath: featuretools/tests/requirement_files/minimum_dask_requirements.txt
- name: Create Pull Request
uses: peter-evans/create-pull-request@v3
with:
token: ${{ secrets.REPO_SCOPED_TOKEN }}
commit-message: Update minimum dependencies
title: Automated Minimum Dependency Updates
author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
body: "This is an auto-generated PR with **minimum** dependency updates.
Please do not delete the `min-dep-update` branch because it's needed by the auto-dependency bot."
branch: min-dep-update
branch-suffix: short-commit-hash
base: main
assignees: machineFL
reviewers: machineAYX
================================================
FILE: .github/workflows/performance-check.yaml
================================================
name: performance-check
on:
push:
branches:
- main
workflow_dispatch:
jobs:
run-performance-analysis:
runs-on: ubuntu-latest
steps:
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Run Lambda
env:
lambda_function: ${{ secrets.LAMBDA_FUNC }}
run: |
echo "{\"TestCommit\": \"$GITHUB_SHA\", \"Flags\": \"--upload-slack\"}" | base64 > payload.b64
aws lambda invoke --function-name $lambda_function --payload file://payload.b64 --invocation-type Event /dev/stdout 1>/dev/null
================================================
FILE: .github/workflows/pull_request_check.yaml
================================================
name: Pull Request Check
on:
pull_request:
types: [opened, edited, reopened, synchronize]
jobs:
pull_request_check:
name: pull request check
runs-on: ubuntu-latest
steps:
- uses: nearform-actions/github-action-check-linked-issues@v1.4.5
id: check-linked-issues
with:
exclude-branches: "release_v**, backport_v**, main, latest-dep-update-**, min-dep-update-**, dependabot/**"
github-token: ${{ secrets.REPO_SCOPED_TOKEN }}
================================================
FILE: .github/workflows/release.yaml
================================================
on:
release:
types: [published]
name: Release
jobs:
pypi-publish:
name: PyPI Release
runs-on: ubuntu-latest
permissions:
id-token: write
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- name: Install deps
run: |
python -m pip install --quiet --upgrade pip
python -m pip install --quiet --upgrade build
python -m pip install --quiet --upgrade setuptools
- name: Remove build artifacts and docs
run: |
rm -rf .eggs/ dist/ build/ docs/
- name: Build distribution
run: python -m build
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
- name: Run workflow to create feedstock pull request
run: |
gh workflow run create_feedstock_pr.yaml --repo "alteryx/featuretools" -f version=${{ github.event.release.tag_name }}
env:
GITHUB_TOKEN: ${{ secrets.REPO_SCOPED_TOKEN }}
================================================
FILE: .github/workflows/release_notes_updated.yaml
================================================
name: Release Notes Updated
on:
pull_request:
types: [opened, synchronize]
jobs:
release_notes_updated:
name: release notes updated
runs-on: ubuntu-latest
steps:
- name: Check for development branch
id: branch
shell: python
env:
REF: ${{ github.event.pull_request.head.ref }}
run: |
from re import compile
import os
main = '^main$'
release = '^release_v\d+\.\d+\.\d+$'
backport = '^backport_v\d+\.\d+\.\d+$'
dep_update = '^latest-dep-update-[a-f0-9]{7}$'
min_dep_update = '^min-dep-update-[a-f0-9]{7}$'
regex = main, release, backport, dep_update, min_dep_update
patterns = list(map(compile, regex))
ref = os.environ["REF"]
is_dev = not any(pattern.match(ref) for pattern in patterns)
print('::set-output name=is_dev::' + str(is_dev))
- if: ${{ steps.branch.outputs.is_dev == 'true' }}
name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- if: ${{ steps.branch.outputs.is_dev == 'true' }}
name: Check if release notes were updated
run: cat docs/source/release_notes.rst | grep ":pr:\`${{ github.event.number }}\`"
================================================
FILE: .github/workflows/test_without_test_dependencies.yaml
================================================
name: Test without Test Dependencies
on:
pull_request:
types: [opened, synchronize]
push:
branches:
- main
workflow_dispatch:
jobs:
use_featuretools_without_test_dependencies:
name: Test featuretools without Test Dependencies
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- name: Set up python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Build featuretools and install
run: |
make package
python -m pip install unpacked_sdist/
- name: Run simple featuretools usage
run: |
import featuretools as ft
es = ft.demo.load_mock_customer(return_entityset=True)
ft.dfs(
entityset=es,
target_dataframe_name="customers",
agg_primitives=["count"],
trans_primitives=["month"],
max_depth=1,
)
from featuretools.primitives import IsFreeEmailDomain
is_free_email_domain = IsFreeEmailDomain()
is_free_email_domain(['name@gmail.com', 'name@featuretools.com']).tolist()
shell: python
================================================
FILE: .github/workflows/tests_with_latest_deps.yaml
================================================
name: Tests
on:
pull_request:
types: [opened, synchronize]
push:
branches:
- main
workflow_dispatch:
jobs:
tests:
name: ${{ matrix.python_version }} unit tests
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python_version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Build featuretools package
run: make package
- name: Set up pip and graphviz
run: |
pip config --site set global.progress_bar off
python -m pip install --upgrade pip
sudo apt update && sudo apt install -y graphviz
- name: Install featuretools with test requirements
run: |
python -m pip install -e unpacked_sdist/
python -m pip install -e unpacked_sdist/[test,dask]
- if: ${{ matrix.python_version == 3.9 }}
name: Generate coverage args
run: echo "coverage_args=--cov=featuretools --cov-config=../pyproject.toml --cov-report=xml:../coverage.xml" >> $GITHUB_ENV
- if: ${{ env.coverage_args }}
name: Erase coverage files
run: |
cd unpacked_sdist
coverage erase
- name: Run unit tests
run: |
cd unpacked_sdist
pytest featuretools/ -n auto ${{ env.coverage_args }}
- if: ${{ env.coverage_args }}
name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
files: ${{ github.workspace }}/coverage.xml
verbose: true
win_unit_tests:
name: ${{ matrix.python_version }} windows unit tests
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
python_version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- name: Download miniconda
shell: pwsh
run: |
$File = "Miniconda3-latest-Windows-x86_64.exe"
$Uri = "https://repo.anaconda.com/miniconda/$File"
$ProgressPreference = "silentlyContinue"
Invoke-WebRequest -Uri $Uri -Outfile "$env:USERPROFILE/$File"
$hashFromFile = Get-FileHash "$env:USERPROFILE/$File" -Algorithm SHA256
$hashFromUrl = "f4d6147b40ea6822255c2dcec8bb0d357c09e230976213f70d7b8c4a10d86bb0"
if ($hashFromFile.Hash -ne "$hashFromUrl") {
Throw "$File hashes do not match"
}
- name: Install miniconda
shell: cmd
run: start /wait "" %UserProfile%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /D=%UserProfile%\Miniconda3
- name: Create python ${{ matrix.python_version }} environment
shell: pwsh
run: |
. $env:USERPROFILE\Miniconda3\shell\condabin\conda-hook.ps1
conda create -n featuretools python=${{ matrix.python_version }}
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- name: Install featuretools with test requirements
shell: pwsh
run: |
. $env:USERPROFILE\Miniconda3\shell\condabin\conda-hook.ps1
conda activate featuretools
conda config --add channels conda-forge
conda install -q -y -c conda-forge python-graphviz graphviz
python -m pip install --upgrade pip
python -m pip install .[test,dask]
- name: Run unit tests
run: |
. $env:USERPROFILE\Miniconda3\shell\condabin\conda-hook.ps1
conda activate featuretools
pytest featuretools\ -n auto
================================================
FILE: .github/workflows/tests_with_minimum_deps.yaml
================================================
name: Tests - Minimum Dependencies
on:
pull_request:
types: [opened, synchronize]
push:
branches:
- main
workflow_dispatch:
jobs:
py39_tests_minimum_dependencies:
name: Tests - 3.9 Minimum Dependencies
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python_version: ["3.9"]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Config pip, upgrade pip, and install graphviz
run: |
sudo apt update
sudo apt install -y graphviz
pip config --site set global.progress_bar off
python -m pip install --upgrade pip
python -m pip install wheel
- name: Install featuretools with no dependencies
run: |
python -m pip install -e . --no-dependencies
- name: Install featuretools - minimum tests dependencies
run: |
python -m pip install -r featuretools/tests/requirement_files/minimum_test_requirements.txt
- name: Install featuretools - minimum core dependencies
run: |
python -m pip install -r featuretools/tests/requirement_files/minimum_core_requirements.txt
- name: Install featuretools - minimum Dask dependencies
run: |
python -m pip install -r featuretools/tests/requirement_files/minimum_dask_requirements.txt
- name: Run unit tests without code coverage
run: python -m pytest -x -n auto featuretools/tests/
================================================
FILE: .github/workflows/tests_with_woodwork_main_branch.yaml
================================================
name: Tests - Featuretools with Woodwork main branch
on:
workflow_dispatch:
jobs:
tests_woodwork_main:
if: ${{ github.repository_owner == 'alteryx' }}
name: ${{ matrix.python_version }} tests ${{ matrix.libraries }}
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
python_version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
- name: Checkout repository
uses: actions/checkout@v3
- name: Build featuretools package
run: make package
- name: Set up pip and graphviz
run: |
pip config --site set global.progress_bar off
python -m pip install -U pip
sudo apt update && sudo apt install -y graphviz
- name: Install Woodwork & Featuretools - test requirements
run: |
python -m pip install -e unpacked_sdist/[test,dask]
python -m pip uninstall -y woodwork
python -m pip install https://github.com/alteryx/woodwork/archive/main.zip
- name: Log test run info
run: |
echo "Run unit tests without code coverage for ${{ matrix.python_version }}"
echo "Testing with woodwork version:" `python -c "import woodwork; print(woodwork.__version__)"`
- name: Run unit tests without code coverage
run: pytest featuretools/ -n auto
slack_alert_failure:
name: Send Slack alert if failure
needs: tests_woodwork_main
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- name: Send Slack alert if failure
if: ${{ needs.tests_woodwork_main.result != 'success' }}
id: slack
uses: slackapi/slack-github-action@v1
with:
payload: |
{
"url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
================================================
FILE: .gitignore
================================================
#
docs/source/generated/
docs/source/getting_started/graphs
venv/
data/
installed/
output.csv
htmlcov/
.idea/
featuretools/tests/integration_data/*.csv
featuretools/tests/integration_data/*.gzip
featuretools/tests/integration_data/customers.gzip
featuretools/tests/integration_data/log-0.gzip
featuretools/tests/integration_data/log-1.gzip
featuretools/tests/integration_data/log.gzip
featuretools/tests/integration_data/products.gzip
featuretools/tests/integration_data/regions.gzip
featuretools/tests/integration_data/sessions.gzip
featuretools/tests/integration_data/stores.gzip
**/dask-worker-space/*
*.dirlock
*.~lock*
unpacked_sdist/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
**/.DS_Store
.DS_Store
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# pickle files
*.p
*.pickle
.pytest_cache
#IDE
.vscode
.devcontainer
*.stats
Dockerfile.arm
.dockerignore
================================================
FILE: .pre-commit-config.yaml
================================================
exclude: |
(?x)
.html$|.csv$|.svg$|.md$|.txt$|.json$|.xml$|.pickle$|^.github/|
(LICENSE.*|README.*)
repos:
- repo: https://github.com/kynan/nbstripout
rev: 0.5.0
hooks:
- id: nbstripout
entry: nbstripout
language: python
types: [jupyter]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.1
hooks:
- id: absolufy-imports
files: ^featuretools/
- repo: https://github.com/asottile/add-trailing-comma
rev: v2.2.3
hooks:
- id: add-trailing-comma
name: Add trailing comma
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: 'v0.3.3'
hooks:
- id: ruff
types_or: [ python, pyi, jupyter ]
args:
- --fix
- --config=./pyproject.toml
- id: ruff-format
types_or: [ python, pyi, jupyter ]
args:
- --config=./pyproject.toml
================================================
FILE: .readthedocs.yaml
================================================
# .readthedocs.yaml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/source/conf.py
# Optionally build your docs in additional formats such as PDF and ePub
formats: []
build:
os: "ubuntu-22.04"
tools:
python: "3.9"
apt_packages:
- graphviz
- openjdk-11-jre-headless
jobs:
post_build:
- export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
python:
install:
- method: pip
path: .
extra_requirements:
- docs
================================================
FILE: LICENSE
================================================
BSD 3-Clause License
Copyright (c) 2017, Feature Labs, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: Makefile
================================================
.PHONY: clean
clean:
find . -name '*.pyo' -delete
find . -name '*.pyc' -delete
find . -name __pycache__ -delete
find . -name '*~' -delete
find . -name '.coverage.*' -delete
.PHONY: lint
lint:
python docs/notebook_version_standardizer.py check-execution
ruff check . --config=./pyproject.toml
ruff format . --check --config=./pyproject.toml
.PHONY: lint-fix
lint-fix:
python docs/notebook_version_standardizer.py standardize
ruff check . --fix --config=./pyproject.toml
ruff format . --config=./pyproject.toml
.PHONY: test
test:
python -m pytest featuretools/ -n auto
.PHONY: testcoverage
testcoverage:
python -m pytest featuretools/ --cov=featuretools -n auto
.PHONY: installdeps
installdeps: upgradepip
pip install -e .
.PHONY: installdeps-dev
installdeps-dev: upgradepip
pip install -e ".[dev]"
pre-commit install
.PHONY: installdeps-test
installdeps-test: upgradepip
pip install -e ".[test]"
.PHONY: checkdeps
checkdeps:
$(eval allow_list='holidays|scipy|numpy|pandas|tqdm|cloudpickle|distributed|dask|psutil|woodwork')
pip freeze | grep -v "alteryx/featuretools.git" | grep -E $(allow_list) > $(OUTPUT_PATH)
.PHONY: upgradepip
upgradepip:
python -m pip install --upgrade pip
.PHONY: upgradebuild
upgradebuild:
python -m pip install --upgrade build
.PHONY: upgradesetuptools
upgradesetuptools:
python -m pip install --upgrade setuptools
.PHONY: package
package: upgradepip upgradebuild upgradesetuptools
python -m build
$(eval PACKAGE=$(shell python -c 'import setuptools; setuptools.setup()' --version))
tar -zxvf "dist/featuretools-${PACKAGE}.tar.gz"
mv "featuretools-${PACKAGE}" unpacked_sdist
================================================
FILE: README.md
================================================
<p align="center">
<img width=50% src="https://www.featuretools.com/wp-content/uploads/2017/12/FeatureLabs-Logo-Tangerine-800.png" alt="Featuretools" />
</p>
<p align="center">
<i>"One of the holy grails of machine learning is to automate more and more of the feature engineering process."</i> ― Pedro Domingos, <a href="https://bit.ly/things_to_know_ml">A Few Useful Things to Know about Machine Learning</a>
</p>
<p align="center">
<a href="https://github.com/alteryx/featuretools/actions/workflows/tests_with_latest_deps.yaml" alt="Tests" target="_blank">
<img src="https://github.com/alteryx/featuretools/actions/workflows/tests_with_latest_deps.yaml/badge.svg?branch=main" alt="Tests" />
</a>
<a href="https://codecov.io/gh/alteryx/featuretools">
<img src="https://codecov.io/gh/alteryx/featuretools/branch/main/graph/badge.svg"/>
</a>
<a href='https://featuretools.alteryx.com/en/stable/?badge=stable'>
<img src='https://readthedocs.com/projects/feature-labs-inc-featuretools/badge/?version=stable' alt='Documentation Status' />
</a>
<a href="https://badge.fury.io/py/featuretools" target="_blank">
<img src="https://badge.fury.io/py/featuretools.svg?maxAge=2592000" alt="PyPI Version" />
</a>
<a href="https://anaconda.org/conda-forge/featuretools" target="_blank">
<img src="https://anaconda.org/conda-forge/featuretools/badges/version.svg" alt="Anaconda Version" />
</a>
<a href="https://stackoverflow.com/questions/tagged/featuretools" target="_blank">
<img src="http://img.shields.io/badge/questions-on_stackoverflow-blue.svg" alt="StackOverflow" />
</a>
<a href="https://pepy.tech/project/featuretools" target="_blank">
<img src="https://static.pepy.tech/badge/featuretools/month" alt="PyPI Downloads" />
</a>
</p>
<hr>
[Featuretools](https://www.featuretools.com) is a python library for automated feature engineering. See the [documentation](https://docs.featuretools.com) for more information.
## Installation
Install with pip
```
python -m pip install featuretools
```
or from the Conda-forge channel on [conda](https://anaconda.org/conda-forge/featuretools):
```
conda install -c conda-forge featuretools
```
### Add-ons
You can install add-ons individually or all at once by running:
```
python -m pip install "featuretools[complete]"
```
**Premium Primitives** - Use Premium Primitives from the premium-primitives repo
```
python -m pip install "featuretools[premium]"
```
**NLP Primitives** - Use Natural Language Primitives from the nlp-primitives repo
```
python -m pip install "featuretools[nlp]"
```
**Dask Support** - Use Dask to run DFS with njobs > 1
```
python -m pip install "featuretools[dask]"
```
## Example
Below is an example of using Deep Feature Synthesis (DFS) to perform automated feature engineering. In this example, we apply DFS to a multi-table dataset consisting of timestamped customer transactions.
```python
>> import featuretools as ft
>> es = ft.demo.load_mock_customer(return_entityset=True)
>> es.plot()
```
<img src="https://github.com/alteryx/featuretools/blob/main/docs/source/_static/images/entity_set.png?raw=true" width="350">
Featuretools can automatically create a single table of features for any "target dataframe"
```python
>> feature_matrix, features_defs = ft.dfs(entityset=es, target_dataframe_name="customers")
>> feature_matrix.head(5)
```
```
zip_code COUNT(transactions) COUNT(sessions) SUM(transactions.amount) MODE(sessions.device) MIN(transactions.amount) MAX(transactions.amount) YEAR(join_date) SKEW(transactions.amount) DAY(join_date) ... SUM(sessions.MIN(transactions.amount)) MAX(sessions.SKEW(transactions.amount)) MAX(sessions.MIN(transactions.amount)) SUM(sessions.MEAN(transactions.amount)) STD(sessions.SUM(transactions.amount)) STD(sessions.MEAN(transactions.amount)) SKEW(sessions.MEAN(transactions.amount)) STD(sessions.MAX(transactions.amount)) NUM_UNIQUE(sessions.DAY(session_start)) MIN(sessions.SKEW(transactions.amount))
customer_id ...
1 60091 131 10 10236.77 desktop 5.60 149.95 2008 0.070041 1 ... 169.77 0.610052 41.95 791.976505 175.939423 9.299023 -0.377150 5.857976 1 -0.395358
2 02139 122 8 9118.81 mobile 5.81 149.15 2008 0.028647 20 ... 114.85 0.492531 42.96 596.243506 230.333502 10.925037 0.962350 7.420480 1 -0.470007
3 02139 78 5 5758.24 desktop 6.78 147.73 2008 0.070814 10 ... 64.98 0.645728 21.77 369.770121 471.048551 9.819148 -0.244976 12.537259 1 -0.630425
4 60091 111 8 8205.28 desktop 5.73 149.56 2008 0.087986 30 ... 83.53 0.516262 17.27 584.673126 322.883448 13.065436 -0.548969 12.738488 1 -0.497169
5 02139 58 4 4571.37 tablet 5.91 148.17 2008 0.085883 19 ... 73.09 0.830112 27.46 313.448942 198.522508 8.950528 0.098885 5.599228 1 -0.396571
[5 rows x 69 columns]
```
We now have a feature vector for each customer that can be used for machine learning. See the [documentation on Deep Feature Synthesis](https://featuretools.alteryx.com/en/stable/getting_started/afe.html) for more examples.
Featuretools contains many different types of built-in primitives for creating features. If the primitive you need is not included, Featuretools also allows you to [define your own custom primitives](https://featuretools.alteryx.com/en/stable/getting_started/primitives.html#defining-custom-primitives).
## Demos
**Predict Next Purchase**
[Repository](https://github.com/alteryx/open_source_demos/blob/main/predict-next-purchase/) | [Notebook](https://github.com/alteryx/open_source_demos/blob/main/predict-next-purchase/Tutorial.ipynb)
In this demonstration, we use a multi-table dataset of 3 million online grocery orders from Instacart to predict what a customer will buy next. We show how to generate features with automated feature engineering and build an accurate machine learning pipeline using Featuretools, which can be reused for multiple prediction problems. For more advanced users, we show how to scale that pipeline to a large dataset using Dask.
For more examples of how to use Featuretools, check out our [demos](https://www.featuretools.com/demos) page.
## Testing & Development
The Featuretools community welcomes pull requests. Instructions for testing and development are available [here.](https://featuretools.alteryx.com/en/stable/install.html#development)
## Support
The Featuretools community is happy to provide support to users of Featuretools. Project support can be found in four places depending on the type of question:
1. For usage questions, use [Stack Overflow](https://stackoverflow.com/questions/tagged/featuretools) with the `featuretools` tag.
2. For bugs, issues, or feature requests start a [Github issue](https://github.com/alteryx/featuretools/issues).
3. For discussion regarding development on the core library, use [Slack](https://join.slack.com/t/alteryx-oss/shared_invite/zt-182tyvuxv-NzIn6eiCEf8TBziuKp0bNA).
4. For everything else, the core developers can be reached by email at open_source_support@alteryx.com
## Citing Featuretools
If you use Featuretools, please consider citing the following paper:
James Max Kanter, Kalyan Veeramachaneni. [Deep feature synthesis: Towards automating data science endeavors.](https://dai.lids.mit.edu/wp-content/uploads/2017/10/DSAA_DSM_2015.pdf) *IEEE DSAA 2015*.
BibTeX entry:
```bibtex
@inproceedings{kanter2015deep,
author = {James Max Kanter and Kalyan Veeramachaneni},
title = {Deep feature synthesis: Towards automating data science endeavors},
booktitle = {2015 {IEEE} International Conference on Data Science and Advanced Analytics, DSAA 2015, Paris, France, October 19-21, 2015},
pages = {1--10},
year = {2015},
organization={IEEE}
}
```
## Built at Alteryx
**Featuretools** is an open source project maintained by [Alteryx](https://www.alteryx.com). To see the other open source projects we’re working on visit [Alteryx Open Source](https://www.alteryx.com/open-source). If building impactful data science pipelines is important to you or your business, please get in touch.
<p align="center">
<a href="https://www.alteryx.com/open-source">
<img src="https://alteryx-oss-web-images.s3.amazonaws.com/OpenSource_Logo-01.png" alt="Alteryx Open Source" width="800"/>
</a>
</p>
================================================
FILE: contributing.md
================================================
# Contributing to Featuretools
:+1::tada: First off, thank you for taking the time to contribute! :tada::+1:
Whether you are a novice or experienced software developer, all contributions and suggestions are welcome!
There are many ways to contribute to Featuretools, with the most common ones being contribution of code or documentation to the project.
**To contribute, you can:**
1. Help users on our [Slack channel](https://join.slack.com/t/alteryx-oss/shared_invite/zt-182tyvuxv-NzIn6eiCEf8TBziuKp0bNA). Answer questions under the featuretools tag on [Stack Overflow](https://stackoverflow.com/questions/tagged/featuretools)
2. Submit a pull request for one of [Good First Issues](https://github.com/alteryx/featuretools/issues?q=is%3Aopen+is%3Aissue+label%3A%22Good+First+Issue%22)
3. Make changes to the codebase, see [Contributing to the codebase](#Contributing-to-the-Codebase).
4. Improve our documentation, which can be found under the [docs](docs/) directory or at https://docs.featuretools.com
5. [Report issues](#Report-issues) you're facing, and give a "thumbs up" on issues that others reported and that are relevant to you. Issues should be used for bugs, and feature requests only.
6. Spread the word: reference Featuretools from your blog and articles, link to it from your website, or simply star it in GitHub to say "I use it".
* If you would like to be featured on [ecosystem page](https://featuretools.alteryx.com/en/stable/resources/ecosystem.html), you can submit a [pull request](https://github.com/alteryx/featuretools).
## Contributing to the Codebase
Before starting major work, you should touch base with the maintainers of Featuretools by filing an issue on GitHub or posting a message in the [#development channel on Slack](https://join.slack.com/t/alteryx-oss/shared_invite/zt-182tyvuxv-NzIn6eiCEf8TBziuKp0bNA). This will increase the likelihood your pull request will eventually get merged in.
#### 1. Fork and clone repo
* The code is hosted on GitHub, so you will need to use Git to fork the project and make changes to the codebase. To start, go to the [Featuretools GitHub page](https://github.com/alteryx/featuretools) and click the `Fork` button.
* After you have created the fork, you will want to clone the fork to your machine and connect your version of the project to the upstream Featuretools repo.
```bash
git clone https://github.com/your-user-name/featuretools.git
cd featuretools
git remote add upstream https://github.com/alteryx/featuretools
```
* Once you have obtained a copy of the code, you should create a development environment that is separate from your existing Python environment so that you can make and test changes without compromising your own work environment. You can run the following steps to create a separate virtual environment, and install Featuretools in editable mode.
```bash
python -m venv venv
source venv/bin/activate
make installdeps
git checkout -b issue####-branch_name
```
* You will need to install GraphViz, and Pandoc to run all unit tests & build docs:
> Pandoc is only needed to build the documentation locally.
**macOS (Intel)** (use [Homebrew](https://brew.sh/)):
```console
brew install graphviz pandoc
```
**macOS (M1)** (use [Homebrew](https://brew.sh/)):
```console
brew install graphviz pandoc
```
**Ubuntu**:
```console
sudo apt install graphviz pandoc -y
```
#### 2. Implement your Pull Request
* Implement your pull request. If needed, add new tests or update the documentation.
* Before submitting to GitHub, verify the tests run and the code lints properly
```bash
# runs linting
make lint
# will fix some common linting issues automatically
make lint-fix
# runs test
make test
```
* If you made changes to the documentation, build the documentation locally.
```bash
# go to docs and build
cd docs
make html
# view docs locally
open build/html/index.html
```
* Before you commit, a few lint fixing hooks will run. You can also manually run these.
```bash
# run linting hooks only on changed files
pre-commit run
# run linting hooks on all files
pre-commit run --all-files
```
#### 3. Submit your Pull Request
* Once your changes are ready to be submitted, make sure to push your changes to GitHub before creating a pull request.
* If you need to update your code with the latest changes from the main Featuretools repo, you can do that by running the commands below, which will merge the latest changes from the Featuretools `main` branch into your current local branch. You may need to resolve merge conflicts if there are conflicts between your changes and the upstream changes. After the merge, you will need to push the updates to your forked repo after running these commands.
```bash
git fetch upstream
git merge upstream/main
```
* Create a pull request to merge the changes from your forked repo branch into the Featuretools `main` branch. Creating the pull request will automatically run our continuous integration.
* If this is your first contribution, you will need to sign the Contributor License Agreement as directed.
* Update the "Future Release" section of the release notes (`docs/source/release_notes.rst`) to include your pull request and add your github username to the list of contributors. Add a description of your PR to the subsection that most closely matches your contribution:
* Enhancements: new features or additions to Featuretools.
* Fixes: things like bugfixes or adding more descriptive error messages.
* Changes: modifications to an existing part of Featuretools.
* Documentation Changes
* Testing Changes
Documentation or testing changes rarely warrant an individual release notes entry; the PR number can be added to their respective "Miscellaneous changes" entries.
* We will review your changes, and you will most likely be asked to make additional changes before it is finally ready to merge. However, once it's reviewed by a maintainer of Featuretools, passes continuous integration, we will merge it, and you will have successfully contributed to Featuretools!
## Report issues
When reporting issues please include as much detail as possible about your operating system, Featuretools version and python version. Whenever possible, please also include a brief, self-contained code example that demonstrates the problem.
================================================
FILE: docs/Makefile
================================================
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = build
GENDIR = source/generated
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
.PHONY: help
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " applehelp to make an Apple Help Book"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " epub3 to make an epub3"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
@echo " coverage to run coverage check of the documentation (if enabled)"
@echo " dummy to check syntax errors of document sources"
.PHONY: clean
clean:
rm -rf $(BUILDDIR)/*
rm -rf $(GENDIR)/*
.PHONY: html
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html $(SPHINXOPTS)
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
.PHONY: dirhtml
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
.PHONY: singlehtml
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
.PHONY: pickle
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
.PHONY: json
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
.PHONY: htmlhelp
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
.PHONY: qthelp
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/featuretools.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/featuretools.qhc"
.PHONY: applehelp
applehelp:
$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
@echo
@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
@echo "N.B. You won't be able to view it unless you put it in" \
"~/Library/Documentation/Help or install it in your application" \
"bundle."
.PHONY: devhelp
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/featuretools"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/featuretools"
@echo "# devhelp"
.PHONY: epub
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
.PHONY: epub3
epub3:
$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
@echo
@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
.PHONY: latex
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
.PHONY: latexpdf
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
.PHONY: latexpdfja
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
.PHONY: text
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
.PHONY: man
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
.PHONY: texinfo
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
.PHONY: info
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
.PHONY: gettext
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
.PHONY: changes
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
.PHONY: linkcheck
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
.PHONY: doctest
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
.PHONY: coverage
coverage:
$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
@echo "Testing of coverage in the sources finished, look at the " \
"results in $(BUILDDIR)/coverage/python.txt."
.PHONY: xml
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
.PHONY: pseudoxml
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
.PHONY: dummy
dummy:
$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
@echo
@echo "Build finished. Dummy builder generates no files."
================================================
FILE: docs/backport_release.md
================================================
# Backport Release Process
In situations where we need to backport commits to earlier versions of our software, we'll need to perform the release process slightly differently than a normal release.
<p align="center">
<img width=60% src="source/_static/images/backport_release.png" alt="Backport Release" />
</p>
This document outlines the differences between a normal release and a backport release. It uses the same outline as the [Release Guide](../release.md).
## 0. Pre-Release Checklist
Before starting the backport release process, verify the following:
- Get agreement on the latest commit to use for targeting the release. A backport release will be targeted on some commit other than the latest on main. Many times the new target will be an old release, which will have a tag that can be referenced--for example `v0.11.1`.
- Get agreement on the commits to port over for the backport release.
- Get agreement on the version number to use for the backport release.
#### Version Numbering for Backport Releases
Featuretools uses [semantic versioning](https://semver.org/). Every release has a major, minor and patch version number, and are displayed like so: `<majorVersion>.<minorVersion>.<patchVersion>`. **A backport release will increment the patch version.**
This may be an intermediate number between two preexisting releases--for example a new `0.11.2` to be added between existing `0.11.1` and `0.12.0` releases. It can also be a new latest release--so `0.12.1` in the same situation--using only some of the commits that are present in the Future Release section of the release notes.
## 0.5. Create target branch for backport release
#### Checkout intended target commit
1. Checkout the agreed upon latest commit for targeting the release. If this is a previous release, you may checkout its tag with `git checkout v0.11.1`.
#### Create backport branch
1. Branch off of the target commit. For the branch name, please use the most recent major and minor versions to this commit (in this example `0` and `11` respectively), leaving the patch number as an `x`. This means that we would create `0.11.x` in the working example. This is necessary so that if any further backport releases are needed, we could continue to use this branch as the target. This branch is to be treated as `main` is treated in a normal release. It will be the target for our release.
This branch will be automatically protected (unless the version exceeds 9.Y.x or X.99.x, in which case contact the repo team about expanding the protection rules) to avoid unintended commits from making their way into the release undetected.
#### Port over desired commits
1. Create a feature branch off the backport branch. For the branch name, please use "backport_vX.Y.Z" as the naming scheme (e.g. "backport_v0.11.2). Doing so will bypass our release notes checkin test which requires all other PRs to add a release note entry.
2. Cherry-pick the desired commits onto `backport_v0.11.2`.
3. Create a pull request with the backport `0.11.x` branch as its target, get confirmation that the desired changes were added, and confirm that the CI checks pass.
4. Under the "Future Release" section in the release notes, include the ported over commits' release notes (don't remove them from their original location back on `main`), indicating that they are a backport of the original PR.
```
Future Release
==============
* Enhancements
* Fixes
* Fix bug (backport of :pr:`1110`)
* Changes
* Documentation Changes
* Testing Changes
Thanks to the following people for contributing to this release:
```
5. Merge the PR into the `0.11.x` backport branch
## 1. Create Featuretools Backport release on Github
With our backport branch `0.11.x` as our target, we now proceed with the release of `0.11.2`.
#### Create release branch
1. **Branch off of the backport branch `0.11.x`.** For the branch name, please use "release_vX.Y.Z" as the naming scheme (e.g. "release_v0.11.2"). Doing so will bypass our release notes checkin test which requires all other PRs to add a release note entry.
#### Bump version number
1. Bump `__version__` in `setup.py`, `featuretools/version.py`, and `featuretools/tests/test_version.py`.
#### Update Release Notes
1. Replace **"Future Release"** in `docs/source/release_notes.rst` with the current date
```
v0.11.2 Sep 28, 2020
====================
```
2. Remove any unused Release Notes sections for this release (e.g. Fixes, Testing Changes)
3. Add yourself to the list of contributors to this release and **put the contributors in alphabetical order**
4. The release PR does not need to be mentioned in the list of changes
5. Add a commented out "Future Release" section with all of the Release Notes sections above the current section
```
.. Future Release
==============
* Enhancements
* Fixes
* Changes
* Documentation Changes
* Testing Changes
.. Thanks to the following people for contributing to this release:
```
#### Create Release PR
A [release pr](https://github.com/alteryx/featuretools/pull/1915) should have the version number as the title and the release notes for that release as the PR body text. The contributors list is not necessary. The special sphinx docs syntax (:pr:\`547\`) needs to be changed to github link syntax (#547).
Checklist before merging:
- All tests are currently green on checkin and on `0.11.x`.
- The ReadtheDocs build for the release PR branch has passed, and the resulting docs contain the expected release notes.
- PR has been reviewed and approved.
- Confirm with the team that `0.11.x` will be frozen until step 2 (Github Release) is complete.
## 2. Create Github Release
After the release pull request has been merged into the `0.11.x` branch, it is time draft the github release. [Example release](https://github.com/alteryx/featuretools/releases/tag/v1.6.0)
- **The target should be the `0.11.x` backport branch**
- The tag should be the version number with a v prefix (e.g. v0.11.2)
- Release title is the same as the tag
- Release description should be the full Release Notes updates for the release, including the line thanking contributors. Contributors should also have their links changed from the docs syntax (:user:\`gsheni\`) to github syntax (@gsheni)
- This is not a pre-release
- Publishing the release will automatically upload the package to PyPI
Note that this backported release will show up on the repository's front page as the latest release even if there is technically a later `0.12.0` release.
## Release on conda-forge
If a later release exists, conda-forge will not automatically create a new PR in [conda-forge/featuretools-feedstock](https://github.com/conda-forge/featuretools-feedstock/pulls). Instead a PR will need to be manually created. You can do either of the following:
- Branch off of the 0.11.1 meta.yaml update commit for the 0.11.2 meta.yaml changes. This is "cleaner" and sometimes easier, but if migration files (like py310) have been added between 0.11.1 and 0.12.0 you will have to add them in and re-render yourself.
- Tack the 0.11.2 changes on after the 0.12.0 update commit in the feedstock repo. This means that if any of the boilerplate has changed, you do not have to manually re-add it yourself. An example of this can be seen from a Woodwork backport release [here](https://github.com/conda-forge/woodwork-feedstock/pull/32).
Once the PR is created:
1. Update requirements changes in `recipe/meta.yaml` - you may need to handle the version, source links, and SHA256 if you had to open the PR yourself. You will also need to update the requirements.
2. After tests pass, a maintainer will merge the PR in
================================================
FILE: docs/make.bat
================================================
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
set I18NSPHINXOPTS=%SPHINXOPTS% source
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. epub3 to make an epub3
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. xml to make Docutils-native XML files
echo. pseudoxml to make pseudoxml-XML files for display purposes
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
echo. coverage to run coverage check of the documentation if enabled
echo. dummy to check syntax errors of document sources
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
REM Check if sphinx-build is available and fallback to Python version if any
%SPHINXBUILD% 1>NUL 2>NUL
if errorlevel 9009 goto sphinx_python
goto sphinx_ok
:sphinx_python
set SPHINXBUILD=python -m sphinx.__init__
%SPHINXBUILD% 2> nul
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
:sphinx_ok
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\featuretools.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\featuretools.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "epub3" (
%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdf" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf
cd %~dp0
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdfja" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf-ja
cd %~dp0
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
if "%1" == "coverage" (
%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
if errorlevel 1 exit /b 1
echo.
echo.Testing of coverage in the sources finished, look at the ^
results in %BUILDDIR%/coverage/python.txt.
goto end
)
if "%1" == "xml" (
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The XML files are in %BUILDDIR%/xml.
goto end
)
if "%1" == "pseudoxml" (
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
goto end
)
if "%1" == "dummy" (
%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
if errorlevel 1 exit /b 1
echo.
echo.Build finished. Dummy builder generates no files.
goto end
)
:end
================================================
FILE: docs/notebook_version_standardizer.py
================================================
import json
import os
import click
DOCS_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "source")
def _get_ipython_notebooks(docs_source):
directories_to_skip = ["_templates", "generated", ".ipynb_checkpoints"]
notebooks = []
for root, _, filenames in os.walk(docs_source):
if any(dir_ in root for dir_ in directories_to_skip):
continue
for filename in filenames:
if filename.endswith(".ipynb"):
notebooks.append(os.path.join(root, filename))
return notebooks
def _check_delete_empty_cell(notebook, delete=True):
with open(notebook, "r") as f:
source = json.load(f)
cell = source["cells"][-1]
if cell["cell_type"] == "code" and cell["source"] == []:
# this is an empty cell, which we should delete
if delete:
source["cells"] = source["cells"][:-1]
else:
return False
if delete:
with open(notebook, "w") as f:
json.dump(source, f, ensure_ascii=False, indent=1)
else:
return True
def _check_execution_and_output(notebook):
with open(notebook, "r") as f:
source = json.load(f)
for cells in source["cells"]:
if cells["cell_type"] == "code" and (
cells["execution_count"] is not None or cells["outputs"] != []
):
return False
return True
def _check_python_version(notebook, default_version):
with open(notebook, "r") as f:
source = json.load(f)
if source["metadata"]["language_info"]["version"] != default_version:
return False
return True
def _fix_python_version(notebook, default_version):
with open(notebook, "r") as f:
source = json.load(f)
source["metadata"]["language_info"]["version"] = default_version
with open(notebook, "w") as f:
json.dump(source, f, ensure_ascii=False, indent=1)
def _fix_execution_and_output(notebook):
with open(notebook, "r") as f:
source = json.load(f)
for cells in source["cells"]:
if cells["cell_type"] == "code" and cells["execution_count"] is not None:
cells["execution_count"] = None
cells["outputs"] = []
source["metadata"]["kernelspec"]["display_name"] = "Python 3"
source["metadata"]["kernelspec"]["name"] = "python3"
with open(notebook, "w") as f:
json.dump(source, f, ensure_ascii=False, indent=1)
def _get_notebooks_with_executions_and_empty(notebooks, default_version="3.9.2"):
executed = []
empty_last_cell = []
versions = []
for notebook in notebooks:
if not _check_execution_and_output(notebook):
executed.append(notebook)
if not _check_delete_empty_cell(notebook, delete=False):
empty_last_cell.append(notebook)
if not _check_python_version(notebook, default_version):
versions.append(notebook)
return (executed, empty_last_cell, versions)
def _fix_versions(notebooks, default_version="3.9.2"):
for notebook in notebooks:
_fix_python_version(notebook, default_version)
def _remove_notebook_empty_last_cell(notebooks):
for notebook in notebooks:
_check_delete_empty_cell(notebook, delete=True)
def _standardize_outputs(notebooks):
for notebook in notebooks:
_fix_execution_and_output(notebook)
@click.group()
def cli():
"""no-op"""
@cli.command()
def standardize():
notebooks = _get_ipython_notebooks(DOCS_PATH)
(
executed_notebooks,
empty_cells,
versions,
) = _get_notebooks_with_executions_and_empty(notebooks)
if executed_notebooks:
_standardize_outputs(executed_notebooks)
executed_notebooks = ["\t" + notebook for notebook in executed_notebooks]
executed_notebooks = "\n".join(executed_notebooks)
click.echo(f"Removed the outputs for:\n {executed_notebooks}")
if empty_cells:
_remove_notebook_empty_last_cell(empty_cells)
empty_cells = ["\t" + notebook for notebook in empty_cells]
empty_cells = "\n".join(empty_cells)
click.echo(f"Removed the empty cells for:\n {empty_cells}")
if versions:
_fix_versions(versions)
versions = ["\t" + notebook for notebook in versions]
versions = "\n".join(versions)
click.echo(f"Fixed python versions for:\n {versions}")
@cli.command()
def check_execution():
notebooks = _get_ipython_notebooks(DOCS_PATH)
(
executed_notebooks,
empty_cells,
versions,
) = _get_notebooks_with_executions_and_empty(notebooks)
if executed_notebooks:
executed_notebooks = ["\t" + notebook for notebook in executed_notebooks]
executed_notebooks = "\n".join(executed_notebooks)
raise SystemExit(
f"The following notebooks have executed outputs:\n {executed_notebooks}\n"
"Please run make lint-fix to fix this.",
)
if empty_cells:
empty_cells = ["\t" + notebook for notebook in empty_cells]
empty_cells = "\n".join(empty_cells)
raise SystemExit(
f"The following notebooks have empty cells at the end:\n {empty_cells}\n"
"Please run make lint-fix to fix this.",
)
if versions:
versions = ["\t" + notebook for notebook in versions]
versions = "\n".join(versions)
raise SystemExit(
f"The following notebooks have the wrong Python version: \n {versions}\n"
"Please run make lint-fix to fix this.",
)
if __name__ == "__main__":
cli()
================================================
FILE: docs/pull_request_template.md
================================================
### Pull Request Description
(replace this text with your description)
-----
*After creating the pull request: in order to pass the **release_notes_updated** check you will need to update the "Future Release" section of* `docs/source/release_notes.rst` *to include this pull request.*
================================================
FILE: docs/source/_static/style.css
================================================
.footer {
background-color: #0D2345;
padding-bottom: 40px;
padding-top: 40px;
width: 100%;
}
.footer-cell-1 {
grid-row: 1;
grid-column: 1 / 3;
}
.footer-cell-2 {
grid-row: 1;
grid-column: 4;
margin-bottom: 15px;
text-align: right;
}
.footer-cell-3 {
grid-row: 2;
grid-column: 1 / 5;
}
.footer-cell-4 {
grid-row: 3;
grid-column: 1 / 3;
}
.footer-container {
display: grid;
margin-left: 10%;
margin-right: 10%;
}
.footer-image-alteryx {
padding-top: 22px;
width: 270px;
}
.footer-image-copyright {
width: 180px;
}
.footer-image-github {
width: 50px;
}
.footer-image-twitter {
width: 60px;
}
.footer-line {
border-top: 2px solid white;
margin-left: 7px;
margin-right: 15px;
}
================================================
FILE: docs/source/api_reference.rst
================================================
.. _api_ref:
API Reference
=============
.. currentmodule:: featuretools
Demo Datasets
~~~~~~~~~~~~~
.. currentmodule:: featuretools.demo
.. autosummary::
:toctree: generated/
load_retail
load_mock_customer
load_flight
load_weather
Deep Feature Synthesis
~~~~~~~~~~~~~~~~~~~~~~
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
dfs
get_valid_primitives
Timedelta
~~~~~~~~~
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
Timedelta
Time utils
~~~~~~~~~~
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
make_temporal_cutoffs
Feature Primitives
~~~~~~~~~~~~~~~~~~
Primitive Types
---------------
.. currentmodule:: featuretools.primitives
.. autosummary::
:toctree: generated/
TransformPrimitive
AggregationPrimitive
.. _api_ref.aggregation_features:
Aggregation Primitives
----------------------
.. autosummary::
:toctree: generated/
All
Any
AverageCountPerUnique
AvgTimeBetween
Count
CountAboveMean
CountBelowMean
CountGreaterThan
CountInsideNthSTD
CountInsideRange
CountLessThan
CountOutsideNthSTD
CountOutsideRange
DateFirstEvent
Entropy
First
FirstLastTimeDelta
HasNoDuplicates
IsMonotonicallyDecreasing
IsMonotonicallyIncreasing
IsUnique
Kurtosis
Last
Max
MaxConsecutiveFalse
MaxConsecutiveNegatives
MaxConsecutivePositives
MaxConsecutiveTrue
MaxConsecutiveZeros
MaxCount
MaxMinDelta
Mean
Median
MedianCount
Min
MinCount
Mode
NMostCommon
NMostCommonFrequency
NUniqueDays
NUniqueDaysOfCalendarYear
NUniqueMonths
NUniqueWeeks
NumConsecutiveGreaterMean
NumConsecutiveLessMean
NumFalseSinceLastTrue
NumPeaks
NumTrue
NumTrueSinceLastFalse
NumUnique
NumZeroCrossings
PercentTrue
PercentUnique
Skew
Std
Sum
TimeSinceFirst
TimeSinceLast
TimeSinceLastFalse
TimeSinceLastMax
TimeSinceLastMin
TimeSinceLastTrue
Trend
Variance
Transform Primitives
--------------------
Binary Transform Primitives
***************************
.. autosummary::
:toctree: generated/
AddNumeric
AddNumericScalar
DivideByFeature
DivideNumeric
DivideNumericScalar
Equal
EqualScalar
GreaterThan
GreaterThanEqualTo
GreaterThanEqualToScalar
GreaterThanScalar
LessThan
LessThanEqualTo
LessThanEqualToScalar
LessThanScalar
ModuloByFeature
ModuloNumeric
ModuloNumericScalar
MultiplyBoolean
MultiplyNumeric
MultiplyNumericBoolean
MultiplyNumericScalar
NotEqual
NotEqualScalar
ScalarSubtractNumericFeature
SubtractNumeric
SubtractNumericScalar
Combine features
****************
.. autosummary::
:toctree: generated/
IsIn
And
Or
Not
.. _api_ref.cumulative_features:
Cumulative Transform Primitives
*******************************
.. autosummary::
:toctree: generated/
Diff
DiffDatetime
TimeSincePrevious
CumCount
CumSum
CumMean
CumMin
CumMax
CumulativeTimeSinceLastFalse
CumulativeTimeSinceLastTrue
Datetime Transform Primitives
*****************************
.. autosummary::
:toctree: generated/
Age
DateToHoliday
DateToTimeZone
Day
DayOfYear
DaysInMonth
DistanceToHoliday
Hour
IsFederalHoliday
IsFirstWeekOfMonth
IsLeapYear
IsLunchTime
IsMonthEnd
IsMonthStart
IsQuarterEnd
IsQuarterStart
IsWeekend
IsWorkingHours
IsYearEnd
IsYearStart
Minute
Month
NthWeekOfMonth
PartOfDay
Quarter
Season
Second
TimeSince
Week
Weekday
Year
Email, URL and File Transform Primitives
****************************************
.. autosummary::
:toctree: generated/
EmailAddressToDomain
FileExtension
IsFreeEmailDomain
URLToDomain
URLToProtocol
URLToTLD
Exponential Transform Primitives
********************************
.. autosummary::
:toctree: generated/
ExponentialWeightedAverage
ExponentialWeightedSTD
ExponentialWeightedVariance
General Transform Primitives
****************************
.. autosummary::
:toctree: generated/
AbsoluteDiff
Absolute
Cosine
IsNull
NaturalLogarithm
Negate
Percentile
PercentChange
RateOfChange
SameAsPrevious
SavgolFilter
Sine
SquareRoot
Tangent
Variance
Location Transform Primitives
*****************************
.. autosummary::
:toctree: generated/
CityblockDistance
GeoMidpoint
Haversine
IsInGeoBox
Latitude
Longitude
Name Transform Primitives
*************************
.. autosummary::
:toctree: generated/
FullNameToFirstName
FullNameToLastName
FullNameToTitle
NaturalLanguage Transform Primitives
************************************
.. autosummary::
:toctree: generated/
CountString
MeanCharactersPerWord
MedianWordLength
NumCharacters
NumUniqueSeparators
NumWords
NumberOfCommonWords
NumberOfHashtags
NumberOfMentions
NumberOfUniqueWords
NumberOfWordsInQuotes
PunctuationCount
TitleWordCount
TotalWordLength
UpperCaseCount
UpperCaseWordCount
WhitespaceCount
Postal Code Primitives
**********************
.. autosummary::
:toctree: generated/
OneDigitPostalCode
TwoDigitPostalCode
Time Series Transform Primitives
********************************
.. autosummary::
:toctree: generated/
ExpandingCount
ExpandingMax
ExpandingMean
ExpandingMin
ExpandingSTD
ExpandingTrend
Lag
RollingCount
RollingMax
RollingMean
RollingMin
RollingOutlierCount
RollingSTD
RollingTrend
Feature methods
---------------
.. currentmodule:: featuretools.feature_base
.. autosummary::
:toctree: generated/
FeatureBase.rename
FeatureBase.get_depth
Feature calculation
~~~~~~~~~~~~~~~~~~~~
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
calculate_feature_matrix
.. approximate_features
Feature descriptions
~~~~~~~~~~~~~~~~~~~~~
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
describe_feature
Feature visualization
~~~~~~~~~~~~~~~~~~~~~~
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
graph_feature
Feature encoding
~~~~~~~~~~~~~~~~~
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
encode_features
Feature Selection
~~~~~~~~~~~~~~~~~
.. currentmodule:: featuretools.selection
.. autosummary::
:toctree: generated/
remove_low_information_features
remove_highly_correlated_features
remove_highly_null_features
remove_single_value_features
Feature Matrix utils
~~~~~~~~~~~~~~~~~~~~
.. currentmodule:: featuretools.computational_backends
.. autosummary::
:toctree: generated/
replace_inf_values
Saving and Loading Features
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
save_features
load_features
.. _api_ref.dataset:
EntitySet, Relationship
~~~~~~~~~~~~~~~~~~~~~~~
Constructors
------------
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
EntitySet
Relationship
EntitySet load and prepare data
-------------------------------
.. autosummary::
:toctree: generated/
EntitySet.add_dataframe
EntitySet.add_interesting_values
EntitySet.add_last_time_indexes
EntitySet.add_relationship
EntitySet.add_relationships
EntitySet.concat
EntitySet.normalize_dataframe
EntitySet.set_secondary_time_index
EntitySet.replace_dataframe
EntitySet serialization
-------------------------------
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
read_entityset
.. currentmodule:: featuretools.entityset
.. autosummary::
:toctree: generated/
EntitySet.to_csv
EntitySet.to_pickle
EntitySet.to_parquet
EntitySet query methods
-----------------------
.. autosummary::
:toctree: generated/
EntitySet.__getitem__
EntitySet.find_backward_paths
EntitySet.find_forward_paths
EntitySet.get_forward_dataframes
EntitySet.get_backward_dataframes
EntitySet.query_by_values
EntitySet visualization
-----------------------
.. autosummary::
:toctree: generated/
EntitySet.plot
Relationship attributes
-----------------------
.. autosummary::
:toctree: generated/
Relationship.parent_column
Relationship.child_column
Relationship.parent_dataframe
Relationship.child_dataframe
Data Type Util Methods
----------------------
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
list_logical_types
list_semantic_tags
Primitive Util Methods
----------------------
.. currentmodule:: featuretools
.. autosummary::
:toctree: generated/
get_recommended_primitives
list_primitives
summarize_primitives
================================================
FILE: docs/source/conf.py
================================================
# -*- coding: utf-8 -*-
#
# featuretools documentation build configuration file, created by
# sphinx-quickstart on Thu May 19 20:40:30 2016.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import os
import shutil
import subprocess
import sys
from pathlib import Path
import featuretools
# run setup script
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "setup.py")
subprocess.check_call([sys.executable, path])
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath("../featuretools"))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.napoleon",
"sphinx.ext.ifconfig",
"sphinx.ext.githubpages",
"nbsphinx",
"IPython.sphinxext.ipython_console_highlighting",
"IPython.sphinxext.ipython_directive",
"sphinx.ext.extlinks",
"sphinx.ext.viewcode",
"sphinx.ext.graphviz",
"sphinx_inline_tabs",
"sphinx_copybutton",
"myst_parser",
]
# ipython_mplbackend = None
ipython_execlines = ["import pandas as pd", "pd.set_option('display.width', 1000000)"]
# autosummary_generate=True
autosummary_generate = ["api_reference.rst"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
# The encoding of source files.
# source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = "index"
# General information about the project.
project = "Featuretools"
copyright = "2019, Feature Labs. BSD License"
author = "Feature Labs, Inc."
latex_documents = [
(master_doc, "featuretools.tex", "test Documentation", "test", "manual"),
]
latex_elements = {
"preamble": r"""
\usepackage[utf8]{inputenc}
""",
}
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = featuretools.__version__
# The full version, including alpha/beta/rc tags.
release = featuretools.__version__
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en"
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
# today = ''
# Else, today_fmt is used as the format for a strftime call.
# today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["**.ipynb_checkpoints"]
# The reST default role (used for this markup: `text`) to use for all
# documents.
# default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
# add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
# keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = "pydata_sphinx_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
"pygment_light_style": "tango",
"pygment_dark_style": "native",
"icon_links": [
{
"name": "GitHub",
"url": "https://github.com/alteryx/featuretools",
"icon": "fab fa-github-square",
"type": "fontawesome",
},
{
"name": "Twitter",
"url": "https://twitter.com/AlteryxOSS",
"icon": "fab fa-twitter-square",
"type": "fontawesome",
},
{
"name": "Slack",
"url": "https://join.slack.com/t/alteryx-oss/shared_invite/zt-182tyvuxv-NzIn6eiCEf8TBziuKp0bNA",
"icon": "fab fa-slack",
"type": "fontawesome",
},
{
"name": "StackOverflow",
"url": "https://stackoverflow.com/questions/tagged/featuretools",
"icon": "fab fa-stack-overflow",
"type": "fontawesome",
},
],
"collapse_navigation": False,
"navigation_depth": 2,
}
# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = []
# The name for this set of Sphinx documents.
# "<project> v<release> documentation" by default.
# html_title = u'featuretools v0.1'
# A shorter title for the navigation bar. Default is the same as html_title.
# html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
html_logo = "_static/images/featuretools_nav2.svg"
# The name of an image file (relative to this directory) to use as a favicon of
# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
html_favicon = "_static/images/favicon.ico"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
# html_extra_path = []
# If not None, a 'Last updated on:' timestamp is inserted at every page
# bottom, using the given strftime format.
# The empty string is equivalent to '%b %d, %Y'.
# html_last_updated_fmt = None
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
# html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
html_sidebars = {
"**": ["globaltoc.html", "relations.html", "sourcelink.html", "searchbox.html"],
}
# Additional templates that should be rendered to pages, maps page names to
# template names.
# html_additional_pages = {}
# If false, no module index is generated.
# html_domain_indices = True
# If false, no index is generated.
# html_use_index = True
# If true, the index is split into individual pages for each letter.
# html_split_index = False
# If true, links to the reST sources are added to the pages.
# html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
html_show_sphinx = False
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
# html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
# html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
# html_file_suffix = None
# Language to be used for generating the HTML full-text search index.
# Sphinx supports the following languages:
# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
# html_search_language = 'en'
# A dictionary with options for the search language support, empty by default.
# 'ja' uses this config value.
# 'zh' user can custom change `jieba` dictionary path.
# html_search_options = {'type': 'default'}
# The name of a javascript file (relative to the configuration directory) that
# implements a search results scorer. If empty, the default will be used.
# html_search_scorer = 'scorer.js'
# Output file base name for HTML help builder.
htmlhelp_basename = "featuretoolsdoc"
# -- Options for Markdown files ----------------------------------------------
myst_admonition_enable = True
myst_deflist_enable = True
myst_heading_anchors = 3
# -- Options for Sphinx Copy Button ------------------------------------------
copybutton_prompt_text = "myinputprompt"
copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
copybutton_prompt_is_regexp = True
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
# Latex figure (float) alignment
#'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(
master_doc,
"featuretools.tex",
"Featuretools Documentation",
"Feature Labs, Inc.",
"manual",
),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
# latex_use_parts = False
# If true, show page references after internal links.
# latex_show_pagerefs = False
# If true, show URL addresses after external links.
# latex_show_urls = False
# Documents to append as an appendix to all manuals.
# latex_appendices = []
# If false, no module index is generated.
# latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "featuretools", "featuretools Documentation", [author], 1)]
# If true, show URL addresses after external links.
# man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"featuretools",
"featuretools Documentation",
author,
"featuretools",
"One line description of project.",
"Miscellaneous",
),
]
# Documents to append as an appendix to all manuals.
# texinfo_appendices = []
# If false, no module index is generated.
# texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
# texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
# texinfo_no_detailmenu = False
nbsphinx_execute = "auto"
extlinks = {
"issue": ("https://github.com/alteryx/featuretools/issues/%s", "GH#%s"),
"pr": ("https://github.com/alteryx/featuretools/pull/%s", "GH#%s"),
"user": ("https://github.com/%s", "@%s"),
}
# Napoleon settings
napoleon_google_docstring = True
napoleon_numpy_docstring = True
napoleon_include_init_with_doc = False
napoleon_include_private_with_doc = False
napoleon_include_special_with_doc = True
napoleon_use_admonition_for_examples = False
napoleon_use_admonition_for_notes = False
napoleon_use_admonition_for_references = False
napoleon_use_ivar = False
napoleon_use_param = True
napoleon_use_rtype = True
def setup(app):
home_dir = os.environ.get("HOME", "/")
ipython_p = Path(home_dir + "/.ipython/profile_default/startup")
ipython_p.mkdir(parents=True, exist_ok=True)
file_p = os.path.abspath(os.path.dirname(__file__))
shutil.copy(
file_p + "/set-headers.py",
home_dir + "/.ipython/profile_default/startup",
)
app.add_css_file("style.css")
================================================
FILE: docs/source/getting_started/afe.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deep Feature Synthesis\n",
"\n",
"Deep Feature Synthesis (DFS) is an automated method for performing feature engineering on relational and temporal data.\n",
"\n",
"## Input Data\n",
"\n",
"Deep Feature Synthesis requires structured datasets in order to perform feature engineering. To demonstrate the capabilities of DFS, we will use a mock customer transactions dataset.\n"
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. note ::\n",
"\n",
" Before using DFS, it is recommended that you prepare your data as an :class:`EntitySet`. See :doc:`using_entitysets` to learn how."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"\n",
"es = ft.demo.load_mock_customer(return_entityset=True)\n",
"es"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Once data is prepared as an `.EntitySet`, we are ready to automatically generate features for a target dataframe - e.g. `customers`.\n",
"\n",
"## Running DFS\n",
"\n",
"Typically, without automated feature engineering, a data scientist would write code to aggregate data for a customer, and apply different statistical functions resulting in features quantifying the customer's behavior. In this example, an expert might be interested in features such as: *total number of sessions* or *month the customer signed up*.\n",
"\n",
"These features can be generated by DFS when we specify the target_dataframe as `customers` and `\"count\"` and `\"month\"` as primitives."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"customers\",\n",
" agg_primitives=[\"count\"],\n",
" trans_primitives=[\"month\"],\n",
" max_depth=1,\n",
")\n",
"feature_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the example above, `\"count\"` is an **aggregation primitive** because it computes a single value based on many sessions related to one customer. `\"month\"` is called a **transform primitive** because it takes one value for a customer transforms it to another."
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. note ::\n",
"\n",
" Feature primitives are a fundamental component to Featuretools. To learn more read :doc:`primitives`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Creating \"Deep Features\"\n",
"\n",
"The name Deep Feature Synthesis comes from the algorithm's ability to stack primitives to generate more complex features. Each time we stack a primitive we increase the \"depth\" of a feature. The `max_depth` parameter controls the maximum depth of the features returned by DFS. Let us try running DFS with `max_depth=2`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"customers\",\n",
" agg_primitives=[\"mean\", \"sum\", \"mode\"],\n",
" trans_primitives=[\"month\", \"hour\"],\n",
" max_depth=2,\n",
")\n",
"feature_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {
"raw_mimetype": "text/markdown"
},
"source": [
"With a depth of 2, a number of features are generated using the supplied primitives. The algorithm to synthesize these definitions is described in this [paper](https://www.jmaxkanter.com/papers/DSAA_DSM_2015.pdf). In the returned feature matrix, let us understand one of the depth 2 features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix[[\"MEAN(sessions.SUM(transactions.amount))\"]]"
]
},
{
"cell_type": "markdown",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"For each customer this feature\n",
"\n",
"1. calculates the ``sum`` of all transaction amounts per session to get total amount per session,\n",
"2. then applies the ``mean`` to the total amounts across multiple sessions to identify the *average amount spent per session*\n",
"\n",
"We call this feature a \"deep feature\" with a depth of 2.\n",
"\n",
"Let's look at another depth 2 feature that calculates for every customer *the most common hour of the day when they start a session*"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix[[\"MODE(sessions.HOUR(session_start))\"]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For each customer this feature calculates\n",
"\n",
"1. The `hour` of the day each of his or her sessions started, then\n",
"2. uses the statistical function `mode` to identify the most common hour he or she started a session\n",
"\n",
"Stacking results in features that are more expressive than individual primitives themselves. This enables the automatic creation of complex patterns for machine learning."
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. note ::\n",
" You can graphically visualize the lineage of a feature by calling :func:`featuretools.graph_feature` on it. You can also generate an English description of the feature with :func:`featuretools.describe_feature`. See :doc:`/guides/feature_descriptions` for more details."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Changing Target DataFrame\n",
"\n",
"DFS is powerful because we can create a feature matrix for any dataframe in our dataset. If we switch our target dataframe to \"sessions\", we can synthesize features for each session instead of each customer. Now, we can use these features to predict the outcome of a session."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"sessions\",\n",
" agg_primitives=[\"mean\", \"sum\", \"mode\"],\n",
" trans_primitives=[\"month\", \"hour\"],\n",
" max_depth=2,\n",
")\n",
"feature_matrix.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"As we can see, DFS will also build deep features based on a parent dataframe, in this case the customer of a particular session. For example, the feature below calculates the mean transaction amount of the customer of the session."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix[[\"customers.MEAN(transactions.amount)\"]].head(5)"
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"Improve feature output\n",
"~~~~~~~~~~~~~~~~~~~~~~\n",
"\n",
"To learn about the parameters to change in DFS read :doc:`/guides/tuning_dfs`.\n",
"\n",
"\n",
".. here it maybe nice to have a table that shows the number of features generated for AirBnB and other KAGGLE datasets once we have them. We can also give the user access to it."
]
}
],
"metadata": {
"celltoolbar": "Raw Cell Format",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: docs/source/getting_started/getting_started_index.rst
================================================
Getting Started
---------------
For a quick introduction to Featuretools, check out our :ref:`5 minute quick start guide <quick-start>`.
How to start working with Featuretools; the main concepts:
.. toctree::
:maxdepth: 1
using_entitysets
afe
primitives
woodwork_types
handling_time
================================================
FILE: docs/source/getting_started/handling_time.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"id": "a8104f18",
"metadata": {},
"source": [
"# Handling Time\n",
"\n",
"\n",
"When performing feature engineering with temporal data, carefully selecting the data that is used for any calculation is paramount. By annotating dataframes with a Woodwork **time index** column and providing a **cutoff time** during feature calculation, Featuretools will automatically filter out any data after the cutoff time before running any calculations."
]
},
{
"cell_type": "raw",
"id": "9cd9cb82",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. note::\n",
" This guide focuses on performing feature engineering on temporal data, but it is not specific to feature engineering for time series problems, which are their own class of machine learning problems. A guide on **using Featuretools for time series feature engineering** can be found `here <../guides/time_series.ipynb>`_."
]
},
{
"cell_type": "markdown",
"id": "32c2ae4d",
"metadata": {},
"source": [
"## What is the Time Index?\n",
"\n",
"\n",
"The time index is the column in the data that specifies when the data in each row became known. For example, let's examine a table of customer transactions:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ebbcb40b",
"metadata": {
"nbsphinx": "hidden"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"pd.options.display.max_columns = 200"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8202f11a",
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"\n",
"es = ft.demo.load_mock_customer(return_entityset=True, random_seed=0)\n",
"es[\"transactions\"].head()"
]
},
{
"cell_type": "markdown",
"id": "cd26087b",
"metadata": {},
"source": [
"In this table, there is one row for every transaction and a ``transaction_time`` column that specifies when the transaction took place. This means that ``transaction_time`` is the time index because it indicates when the information in each row became known and available for feature calculations. For now, ignore the ``_ft_last_time`` column. That is a featuretools-generated column that will be discussed later on.\n",
"\n",
"However, not every datetime column is a time index. Consider the ``customers`` dataframe:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87dd0a0d",
"metadata": {},
"outputs": [],
"source": [
"es[\"customers\"]"
]
},
{
"cell_type": "markdown",
"id": "c89d548d",
"metadata": {},
"source": [
"Here, we have two time columns, ``join_date`` and ``birthday``. While either column might be useful for making features, the ``join_date`` should be used as the time index because it indicates when that customer first became available in the dataset."
]
},
{
"cell_type": "raw",
"id": "85b51512",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. important::\n",
"\n",
" The **time index** is defined as the first time that any information from a row can be used. If a cutoff time is specified when calculating features, rows that have a later value for the time index are automatically ignored."
]
},
{
"cell_type": "markdown",
"id": "00e3c365",
"metadata": {},
"source": [
"# What is the Cutoff Time?\n",
"The **cutoff_time** specifies the last point in time that a row’s data can be used for a feature calculation. Any data after this point in time will be filtered out before calculating features.\n",
"\n",
"For example, let's consider a dataset of timestamped customer transactions, where we want to predict whether customers ``1``, ``2`` and ``3`` will spend $500 between ``04:00`` on January 1 and the end of the day. When building features for this prediction problem, we need to ensure that no data after ``04:00`` is used in our calculations.\n",
"\n",
"<img src=\"../_static/images/retail_ct.png\" width=\"400\" align=\"center\" alt=\"retail cutoff time diagram\">"
]
},
{
"cell_type": "raw",
"id": "19855e77",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"We pass the cutoff time to :func:`featuretools.dfs` or :func:`featuretools.calculate_feature_matrix` using the ``cutoff_time`` argument like this:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0717f7d",
"metadata": {},
"outputs": [],
"source": [
"fm, features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"customers\",\n",
" cutoff_time=pd.Timestamp(\"2014-1-1 04:00\"),\n",
" instance_ids=[1, 2, 3],\n",
" cutoff_time_in_index=True,\n",
")\n",
"fm"
]
},
{
"cell_type": "markdown",
"id": "feafa08d",
"metadata": {},
"source": [
"Even though the entityset contains the complete transaction history for each customer, only data with a time index up to and including the cutoff time was used to calculate the features above.\n",
"\n",
"## Using a Cutoff Time DataFrame\n",
"\n",
"\n",
"Oftentimes, the training examples for machine learning will come from different points in time. To specify a unique cutoff time for each row of the resulting feature matrix, we can pass a dataframe which includes one column for the instance id and another column for the corresponding cutoff time. These columns can be in any order, but they must be named properly. The column with the instance ids must either be named ``instance_id`` or have the same name as the target dataframe ``index``. The column with the cutoff time values must either be named ``time`` or have the same name as the target dataframe ``time_index``.\n",
"\n",
"The column names for the instance ids and the cutoff time values should be unambiguous. Passing a dataframe that contains both a column with the same name as the target dataframe ``index`` and a column named ``instance_id`` will result in an error. Similarly, if the cutoff time dataframe contains both a column with the same name as the target dataframe ``time_index`` and a column named ``time`` an error will be raised."
]
},
{
"cell_type": "raw",
"id": "6ffaffd0",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. note::\n",
"\n",
" Only the columns corresponding to the instance ids and the cutoff times are used to calculate features. Any additional columns passed through are appended to the resulting feature matrix. This is typically used to pass through machine learning labels to ensure that they stay aligned with the feature matrix."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa5cc115",
"metadata": {},
"outputs": [],
"source": [
"cutoff_times = pd.DataFrame()\n",
"cutoff_times[\"customer_id\"] = [1, 2, 3, 1]\n",
"cutoff_times[\"time\"] = pd.to_datetime(\n",
" [\"2014-1-1 04:00\", \"2014-1-1 05:00\", \"2014-1-1 06:00\", \"2014-1-1 08:00\"]\n",
")\n",
"cutoff_times[\"label\"] = [True, True, False, True]\n",
"cutoff_times\n",
"fm, features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"customers\",\n",
" cutoff_time=cutoff_times,\n",
" cutoff_time_in_index=True,\n",
")\n",
"fm"
]
},
{
"cell_type": "markdown",
"id": "6185bb0d",
"metadata": {},
"source": [
"We can now see that every row of the feature matrix is calculated at the corresponding time in the cutoff time dataframe. Because we calculate each row at a different time, it is possible to have a repeat customer. In this case, we calculated the feature vector for customer 1 at both ``04:00`` and ``08:00``.\n",
"\n",
"Training Window\n",
"---------------\n",
"\n",
"By default, all data up to and including the cutoff time is used. We can restrict the amount of historical data that is selected for calculations using a \"training window.\"\n",
"\n",
"Here's an example of using a two hour training window:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e321d463",
"metadata": {},
"outputs": [],
"source": [
"window_fm, window_features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"customers\",\n",
" cutoff_time=cutoff_times,\n",
" cutoff_time_in_index=True,\n",
" training_window=\"2 hour\",\n",
")\n",
"\n",
"window_fm"
]
},
{
"cell_type": "markdown",
"id": "4ee67c4d",
"metadata": {},
"source": [
"We can see that that the counts for the same feature are lower after we shorten the training window:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "93d6b9ae",
"metadata": {},
"outputs": [],
"source": [
"fm[[\"COUNT(transactions)\"]]\n",
"\n",
"window_fm[[\"COUNT(transactions)\"]]"
]
},
{
"cell_type": "markdown",
"id": "ad7c73c4",
"metadata": {},
"source": [
"## Setting a Last Time Index\n",
"\n",
"The training window in Featuretools limits the amount of past data that can be used while calculating a particular feature vector. A row in the dataframe is filtered out if the value of its time index is either before or after the training window. This works for dataframes where a row occurs at a single point in time. However, a row can sometimes exist for a duration.\n",
"\n",
"For example, a customer's session has multiple transactions which can happen at different points in time. If we are trying to count the number of sessions a user has in a given time period, we often want to count all the sessions that had *any* transaction during the training window. To accomplish this, we need to not only know when a session starts, but also when it ends. The last time that an instance appears in the data is stored in the `_ft_last_time` column on the dataframe. We can compare the time index and the last time index of the ``sessions`` dataframe above:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "493c8193",
"metadata": {},
"outputs": [],
"source": [
"last_time_index_col = es[\"sessions\"].ww.metadata.get(\"last_time_index\")\n",
"es[\"sessions\"][[\"session_start\", last_time_index_col]].head()"
]
},
{
"cell_type": "markdown",
"id": "b7f1c5cb",
"metadata": {},
"source": [
"Featuretools can automatically add last time indexes to every DataFrame in an ``Entityset`` by running ``EntitySet.add_last_time_indexes()``. When using a training window, if a `last_time_index has` been set, Featuretools will check to see if the `last_time_index` is after the start of the training window. That, combined with the cutoff time, allows DFS to discover which data is relevant for a given training window.\n",
"\n",
"\n",
"## Excluding data at cutoff times"
]
},
{
"cell_type": "raw",
"id": "b44bee57",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"The ``cutoff_time`` is the last point in time where data can be used for feature\n",
"calculation. If you don't want to use the data at the cutoff time in feature\n",
"calculation, you can exclude that data by setting ``include_cutoff_time`` to\n",
"``False`` in :func:`featuretools.dfs` or :func:`featuretools.calculate_feature_matrix`.\n",
"If you set it to ``True`` (the default behavior), data from the cutoff time point\n",
"will be used."
]
},
{
"cell_type": "markdown",
"id": "2e92d895",
"metadata": {},
"source": [
"Setting ``include_cutoff_time`` to ``False`` also impacts how data at the edges\n",
"of training windows are included or excluded. Take this slice of data as an example:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76f9676f",
"metadata": {},
"outputs": [],
"source": [
"df = es[\"transactions\"]\n",
"df[df[\"session_id\"] == 1].head()"
]
},
{
"cell_type": "markdown",
"id": "ce77f6fd",
"metadata": {},
"source": [
"Looking at the data, transactions occur every 65 seconds. To check how ``include_cutoff_time``\n",
"effects training windows, we can calculate features at the time of a transaction\n",
"while using a 65 second training window. This creates a training window with a\n",
"transaction at both endpoints of the window. For this example, we'll find the sum\n",
"of all transactions for session id 1 that are in the training window."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1841d78b",
"metadata": {},
"outputs": [],
"source": [
"from featuretools.primitives import Sum\n",
"\n",
"sum_log = ft.Feature(\n",
" es[\"transactions\"].ww[\"amount\"],\n",
" parent_dataframe_name=\"sessions\",\n",
" primitive=Sum,\n",
")\n",
"cutoff_time = pd.DataFrame(\n",
" {\n",
" \"session_id\": [1],\n",
" \"time\": [\"2014-01-01 00:04:20\"],\n",
" }\n",
").astype({\"time\": \"datetime64[ns]\"})"
]
},
{
"cell_type": "markdown",
"id": "3c15be10",
"metadata": {},
"source": [
"With ``include_cutoff_time=True``, the oldest point in the training window\n",
"(``2014-01-01 00:03:15``) is excluded and the cutoff time point is included. This\n",
"means only transaction 371 is in the training window, so the sum of all transaction\n",
"amounts is 31.54"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f782683a",
"metadata": {},
"outputs": [],
"source": [
"# Case1. include_cutoff_time = True\n",
"actual = ft.calculate_feature_matrix(\n",
" features=[sum_log],\n",
" entityset=es,\n",
" cutoff_time=cutoff_time,\n",
" cutoff_time_in_index=True,\n",
" training_window=\"65 seconds\",\n",
" include_cutoff_time=True,\n",
")\n",
"actual"
]
},
{
"cell_type": "markdown",
"id": "324246db",
"metadata": {},
"source": [
"Whereas with ``include_cutoff_time=False``, the oldest point in the window is\n",
"included and the cutoff time point is excluded. So in this case transaction 116\n",
"is included and transaction 371 is exluded, and the sum is 78.92\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b63bc68",
"metadata": {},
"outputs": [],
"source": [
"# Case2. include_cutoff_time = False\n",
"actual = ft.calculate_feature_matrix(\n",
" features=[sum_log],\n",
" entityset=es,\n",
" cutoff_time=cutoff_time,\n",
" cutoff_time_in_index=True,\n",
" training_window=\"65 seconds\",\n",
" include_cutoff_time=False,\n",
")\n",
"actual"
]
},
{
"cell_type": "markdown",
"id": "4329314f",
"metadata": {},
"source": [
"Approximating Features by Rounding Cutoff Times\n",
"-----------------------------------------------\n",
"\n",
"For each unique cutoff time, Featuretools must perform operations to select the data that’s valid for computations. If there are a large number of unique cutoff times relative to the number of instances for which we are calculating features, the time spent filtering data can add up. By reducing the number of unique cutoff times, we minimize the overhead from searching for and extracting data for feature calculations.\n",
"\n",
"One way to decrease the number of unique cutoff times is to round cutoff times to an earlier point in time. An earlier cutoff time is always valid for predictive modeling — it just means we’re not using some of the data we could potentially use while calculating that feature. So, we gain computational speed by losing a small amount of information.\n",
"\n",
"To understand when an approximation is useful, consider calculating features for a model to predict fraudulent credit card transactions. In this case, an important feature might be, \"the average transaction amount for this card in the past\". While this value can change every time there is a new transaction, updating it less frequently might not impact accuracy."
]
},
{
"cell_type": "raw",
"id": "3628cc1c",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. note::\n",
"\n",
" The bank BBVA used approximation when building a predictive model for credit card fraud using Featuretools. For more details, see the \"Real-time deployment considerations\" section of the `white paper <https://arxiv.org/abs/1710.07709>`_ describing the work involved.\n"
]
},
{
"cell_type": "raw",
"id": "4bf10090",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"The frequency of approximation is controlled using the ``approximate`` parameter to :func:`featuretools.dfs` or :func:`featuretools.calculate_feature_matrix`. For example, the following code would approximate aggregation features at 1 day intervals::"
]
},
{
"cell_type": "markdown",
"id": "641981d0",
"metadata": {},
"source": [
" fm = ft.calculate_feature_matrix(features=features,\n",
" entityset=es_transactions,\n",
" cutoff_time=ct_transactions,\n",
" approximate=\"1 day\")\n",
"\n",
"In this computation, features that can be approximated will be calculated at 1 day intervals, while features that cannot be approximated (e.g \"where did this transaction occur?\") will be calculated at the exact cutoff time.\n",
"\n",
"\n",
"## Secondary Time Index\n",
"\n",
"It is sometimes the case that information in a dataset is updated or added after a row has been created. This means that certain columns may actually become known after the time index for a row. Rather than drop those columns to avoid leaking information, we can create a secondary time index to indicate when those columns become known."
]
},
{
"cell_type": "raw",
"id": "6f8197f9",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"The :func:`Flights <featuretools.demo.load_flight>` entityset is a good example of a dataset where column values in a row become known at different times. Each trip is recorded in the ``trip_logs`` dataframe, and has many times associated with it."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d6043477",
"metadata": {
"nbsphinx": "hidden"
},
"outputs": [],
"source": [
"import urllib.request as urllib2\n",
"\n",
"opener = urllib2.build_opener()\n",
"opener.addheaders = [(\"Testing\", \"True\")]\n",
"urllib2.install_opener(opener)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abf92463",
"metadata": {},
"outputs": [],
"source": [
"es_flight = ft.demo.load_flight(nrows=100)\n",
"es_flight\n",
"es_flight[\"trip_logs\"].head(3)"
]
},
{
"cell_type": "markdown",
"id": "36827ff9",
"metadata": {},
"source": [
"For every trip log, the time index is ``date_scheduled``, which is when the airline decided on the scheduled departure and arrival times, as well as what route will be flown. We don't know the rest of the information about the actual departure/arrival times and the details of any delay at this time. However, it is possible to know everything about how a trip went after it has arrived, so we can use that information at any time after the flight lands.\n",
"\n",
"Using a secondary time index, we can indicate to Featuretools which columns in our flight logs are known at the time the flight is scheduled, plus which are known at the time the flight lands.\n",
"\n",
"<img src=\"../_static/images/flight_ti_2.png\" width=\"400\" align=\"center\" alt=\"flight secondary time index diagram\">\n",
"\n",
"In Featuretools, when adding the dataframe to the ``EntitySet``, we set the secondary time index to be the arrival time like this:\n",
"\n",
" es = ft.EntitySet('Flight Data')\n",
" arr_time_columns = ['arr_delay', 'dep_delay', 'carrier_delay', 'weather_delay',\n",
" 'national_airspace_delay', 'security_delay',\n",
" 'late_aircraft_delay', 'canceled', 'diverted',\n",
" 'taxi_in', 'taxi_out', 'air_time', 'dep_time']\n",
"\n",
" es.add_dataframe(\n",
" dataframe_name='trip_logs',\n",
" dataframe=data,\n",
" index='trip_log_id',\n",
" make_index=True,\n",
" time_index='date_scheduled',\n",
" secondary_time_index={'arr_time': arr_time_columns})\n",
"\n",
"By setting a secondary time index, we can still use the delay information from a row, but only when it becomes known."
]
},
{
"cell_type": "raw",
"id": "eaef7ec8",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. hint::\n",
"\n",
" It's often a good idea to use a secondary time index if your entityset has inline labels. If you know when the label would be valid for use, it's possible to automatically create very predictive features using historical labels."
]
},
{
"cell_type": "markdown",
"id": "03448def",
"metadata": {},
"source": [
"## Flight Predictions\n",
"\n",
"Let's make some features at varying times using the flight example described above. Trip ``14`` is a flight from CLT to PHX on January 31, 2017 and trip ``92`` is a flight from PIT to DFW on January 1. We can set any cutoff time before the flight is scheduled to depart, emulating how we would make the prediction at that point in time.\n",
"\n",
"We set two cutoff times for trip ``14`` at two different times: one which is more than a month before the flight and another which is only 5 days before. For trip ``92``, we'll only set one cutoff time, three days before it is scheduled to leave.\n",
"\n",
"<img src=\"../_static/images/flight_ct.png\" width=\"500\" align=\"center\" alt=\"flight cutoff time diagram\">\n",
"\n",
"Our cutoff time dataframe looks like this:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c338105b",
"metadata": {},
"outputs": [],
"source": [
"ct_flight = pd.DataFrame()\n",
"ct_flight[\"trip_log_id\"] = [14, 14, 92]\n",
"ct_flight[\"time\"] = pd.to_datetime([\"2016-12-28\", \"2017-1-25\", \"2016-12-28\"])\n",
"ct_flight[\"label\"] = [True, True, False]\n",
"ct_flight"
]
},
{
"cell_type": "markdown",
"id": "f26db5dd",
"metadata": {},
"source": [
"Now, let's calculate the feature matrix:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd56c24e",
"metadata": {},
"outputs": [],
"source": [
"fm, features = ft.dfs(\n",
" entityset=es_flight,\n",
" target_dataframe_name=\"trip_logs\",\n",
" cutoff_time=ct_flight,\n",
" cutoff_time_in_index=True,\n",
" agg_primitives=[\"max\"],\n",
" trans_primitives=[\"month\"],\n",
")\n",
"fm[\n",
" [\n",
" \"flights.origin\",\n",
" \"flights.dest\",\n",
" \"label\",\n",
" \"flights.MAX(trip_logs.arr_delay)\",\n",
" \"MONTH(scheduled_dep_time)\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "markdown",
"id": "f367279c",
"metadata": {},
"source": [
"Let's understand the output:\n",
"\n",
"1. A row was made for every id-time pair in ``ct_flight``, which is returned as the index of the feature matrix.\n",
"\n",
"2. The output was sorted by cutoff time. Because of the sorting, it's often helpful to pass in a label with the cutoff time dataframe so that it will remain sorted in the same fashion as the feature matrix. Any additional columns beyond ``id`` and ``cutoff_time`` will not be used for making features.\n",
"\n",
"3. The column ``flights.MAX(trip_logs.arr_delay)`` is not always defined. It can only have any real values when there are historical flights to aggregate. Notice that, for trip ``14``, there wasn't any historical data when we made the feature a month in advance, but there **were** flights to aggregate when we shortened it to 5 days. These are powerful features that are often excluded in manual processes because of how hard they are to make.\n",
"\n",
"\n",
"Creating and Flattening a Feature Tensor\n",
"----------------------------------------"
]
},
{
"cell_type": "raw",
"id": "3d5f23cc",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"The :func:`featuretools.make_temporal_cutoffs` function generates a series of equally spaced cutoff times from a given set of cutoff times and instance ids."
]
},
{
"cell_type": "markdown",
"id": "a7b677e7",
"metadata": {},
"source": [
"This function can be paired with DFS to create and flatten a feature tensor rather than making multiple feature matrices at different delays.\n",
"\n",
"The function\n",
"takes in the the following parameters:\n",
"\n",
" * ``instance_ids (list, pd.Series, or np.ndarray)``: A list of instances.\n",
" * ``cutoffs (list, pd.Series, or np.ndarray)``: An associated list of cutoff times.\n",
" * ``window_size (str or pandas.DateOffset)``: The amount of time between each cutoff time in the created time series.\n",
" * ``start (datetime.datetime or pd.Timestamp)``: The first cutoff time in the created time series.\n",
" * ``num_windows (int)``: The number of cutoff times to create in the created time series.\n",
"\n",
"Only two of the three options ``window_size``, ``start``, and ``num_windows`` need to be specified to uniquely determine an equally-spaced set of cutoff times at which to compute each instance.\n",
"\n",
"If your cutoff times are the ones used above:\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7648a9d",
"metadata": {},
"outputs": [],
"source": [
"cutoff_times"
]
},
{
"cell_type": "markdown",
"id": "9bda6ff4",
"metadata": {},
"source": [
"Then passing in ``window_size='1h'`` and ``num_windows=2`` makes one row an hour over the last two hours to produce the following new dataframe. The result can be directly passed into DFS to make features at the different time points."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4204f47",
"metadata": {},
"outputs": [],
"source": [
"temporal_cutoffs = ft.make_temporal_cutoffs(\n",
" cutoff_times[\"customer_id\"], cutoff_times[\"time\"], window_size=\"1h\", num_windows=2\n",
")\n",
"temporal_cutoffs\n",
"fm, features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"customers\",\n",
" cutoff_time=temporal_cutoffs,\n",
" cutoff_time_in_index=True,\n",
")\n",
"fm"
]
}
],
"metadata": {
"celltoolbar": "Raw Cell Format",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
================================================
FILE: docs/source/getting_started/primitives.ipynb
================================================
{
"cells": [
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. _primitives:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Feature primitives\n",
"Feature primitives are the building blocks of Featuretools. They define individual computations that can be applied to raw datasets to create new features. Because a primitive only constrains the input and output data types, they can be applied across datasets and can stack to create new calculations.\n",
"\n",
"## Why primitives?\n",
"The space of potential functions that humans use to create a feature is expansive. By breaking common feature engineering calculations down into primitive components, we are able to capture the underlying structure of the features humans create today.\n",
"\n",
"A primitive only constrains the input and output data types. This means they can be used to transfer calculations known in one domain to another. Consider a feature which is often calculated by data scientists for transactional or event logs data: *average time between events*. This feature is incredibly valuable in predicting fraudulent behavior or future customer engagement.\n",
"\n",
"DFS achieves the same feature by stacking two primitives `\"time_since_previous\"` and `\"mean\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"\n",
"es = ft.demo.load_mock_customer(return_entityset=True)\n",
"\n",
"feature_defs = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"customers\",\n",
" agg_primitives=[\"mean\"],\n",
" trans_primitives=[\"time_since_previous\"],\n",
" features_only=True,\n",
")\n",
"\n",
"feature_defs"
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. note:: \n",
"\n",
" The primitive arguments to DFS (eg. ``agg_primitives`` and ``trans_primitives`` in the example above) accept ``snake_case``, ``camelCase``, or ``TitleCase`` strings of included Featuretools primitives (ie. ``time_since_previous``, ``timeSincePrevious``, and ``TimeSincePrevious`` are all acceptable inputs).\n",
"\n",
".. note::\n",
"\n",
" When ``dfs`` is called with ``features_only=True``, only feature definitions are returned as output. By default this parameter is set to ``False``. This parameter is used quickly inspect the feature definitions before the spending time calculating the feature matrix."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A second advantage of primitives is that they can be used to quickly enumerate many interesting features in a parameterized way. This is used by Deep Feature Synthesis to get several different ways of summarizing the time since the previous event."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"customers\",\n",
" agg_primitives=[\"mean\", \"max\", \"min\", \"std\", \"skew\"],\n",
" trans_primitives=[\"time_since_previous\"],\n",
")\n",
"\n",
"feature_matrix[\n",
" [\n",
" \"MEAN(sessions.TIME_SINCE_PREVIOUS(session_start))\",\n",
" \"MAX(sessions.TIME_SINCE_PREVIOUS(session_start))\",\n",
" \"MIN(sessions.TIME_SINCE_PREVIOUS(session_start))\",\n",
" \"STD(sessions.TIME_SINCE_PREVIOUS(session_start))\",\n",
" \"SKEW(sessions.TIME_SINCE_PREVIOUS(session_start))\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Aggregation vs Transform Primitive\n",
"\n",
"In the example above, we use two types of primitives.\n",
"\n",
"**Aggregation primitives:** These primitives take related instances as an input and output a single value. They are applied across a parent-child relationship in an EntitySet. E.g: `\"count\"`, `\"sum\"`, `\"avg_time_between\"`."
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. graphviz:: graphs/agg_feat.dot"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Transform primitives:** These primitives take one or more columns from a dataframe as an input and output a new column for that dataframe. They are applied to a single dataframe. E.g: `\"hour\"`, `\"time_since_previous\"`, `\"absolute\"`."
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. graphviz:: graphs/trans_feat.dot\n",
"\n",
"\n",
"The above graphs were generated using the :func:`graph_feature <featuretools.graph_feature>` function. These feature lineage graphs help to visually show how primitives were stacked to generate a feature."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For a DataFrame that lists and describes each built-in primitive in Featuretools, call `ft.list_primitives()`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ft.list_primitives().head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For a DataFrame of metrics that summarizes various properties and capabilities of all of the built-in primitives in Featuretools, call `ft.summarize_primitives()`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ft.summarize_primitives()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Defining Custom Primitives\n",
"\n",
"The library of primitives in Featuretools is constantly expanding. Users can define their own primitive using the APIs below. To define a primitive, a user will\n",
"\n",
"\n",
" * Specify the type of primitive `Aggregation` or `Transform`\n",
" * Define the input and output data types\n",
" * Write a function in python to do the calculation\n",
" * Annotate with attributes to constrain how it is applied\n",
"\n",
"\n",
"Once a primitive is defined, it can stack with existing primitives to generate complex patterns. This enables primitives known to be important for one domain to automatically be transfered to another."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from woodwork.column_schema import ColumnSchema\n",
"from woodwork.logical_types import Datetime, NaturalLanguage\n",
"\n",
"from featuretools.primitives import AggregationPrimitive, TransformPrimitive\n",
"from featuretools.tests.testing_utils import make_ecommerce_entityset"
]
},
{
"cell_type": "markdown",
"metadata": {
"raw_mimetype": "text/markdown"
},
"source": [
"### Simple Custom Primitives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class Absolute(TransformPrimitive):\n",
" name = \"absolute\"\n",
" input_types = [ColumnSchema(semantic_tags={\"numeric\"})]\n",
" return_type = ColumnSchema(semantic_tags={\"numeric\"})\n",
"\n",
" def get_function(self):\n",
" def absolute(column):\n",
" return abs(column)\n",
"\n",
" return absolute"
]
},
{
"cell_type": "markdown",
"metadata": {
"raw_mimetype": "text/markdown"
},
"source": [
"Above, we created a new transform primitive that can be used with Deep Feature Synthesis by deriving a new primitive class using `TransformPrimitive` as a base and overriding `get_function` to return a function that calculates the feature. Additionally, we set the input data types that the primitive applies to and the return data type. Input and return data types are defined using a Woodwork ColumnSchema. A full guide on Woodwork logical types and semantic tags can be found in the Woodwork [Understanding Logical Types and Semantic Tags](https://woodwork.alteryx.com/en/stable/guides/logical_types_and_semantic_tags.html) guide.\n",
"\n",
"Similarly, we can make a new aggregation primitive using `AggregationPrimitive`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class Maximum(AggregationPrimitive):\n",
" name = \"maximum\"\n",
" input_types = [ColumnSchema(semantic_tags={\"numeric\"})]\n",
" return_type = ColumnSchema(semantic_tags={\"numeric\"})\n",
"\n",
" def get_function(self):\n",
" def maximum(column):\n",
" return max(column)\n",
"\n",
" return maximum"
]
},
{
"cell_type": "markdown",
"metadata": {
"raw_mimetype": "text/markdown"
},
"source": [
"Because we defined an aggregation primitive, the function takes in a list of values but only returns one.\n",
"\n",
"Now that we've defined two primitives, we can use them with the dfs function as if they were built-in primitives."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"sessions\",\n",
" agg_primitives=[Maximum],\n",
" trans_primitives=[Absolute],\n",
" max_depth=2,\n",
")\n",
"\n",
"feature_matrix.head(5)[\n",
" [\n",
" \"customers.MAXIMUM(transactions.amount)\",\n",
" \"MAXIMUM(transactions.ABSOLUTE(amount))\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
"### Word Count Example\n",
"\n",
"Here we define a transform primitive, `WordCount`, which counts the number of words in each row of an input and returns a list of the counts."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class WordCount(TransformPrimitive):\n",
" \"\"\"\n",
" Counts the number of words in each row of the column. Returns a list\n",
" of the counts for each row.\n",
" \"\"\"\n",
"\n",
" name = \"word_count\"\n",
" input_types = [ColumnSchema(logical_type=NaturalLanguage)]\n",
" return_type = ColumnSchema(semantic_tags={\"numeric\"})\n",
"\n",
" def get_function(self):\n",
" def word_count(column):\n",
" word_counts = []\n",
" for value in column:\n",
" words = value.split(None)\n",
" word_counts.append(len(words))\n",
" return word_counts\n",
"\n",
" return word_count"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es = make_ecommerce_entityset()\n",
"\n",
"feature_matrix, features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"sessions\",\n",
" agg_primitives=[\"sum\", \"mean\", \"std\"],\n",
" trans_primitives=[WordCount],\n",
")\n",
"\n",
"feature_matrix[\n",
" [\n",
" \"customers.WORD_COUNT(favorite_quote)\",\n",
" \"STD(log.WORD_COUNT(comments))\",\n",
" \"SUM(log.WORD_COUNT(comments))\",\n",
" \"MEAN(log.WORD_COUNT(comments))\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {
"raw_mimetype": "text/markdown"
},
"source": [
"By adding some aggregation primitives as well, Deep Feature Synthesis was able to make four new features from one new primitive.\n",
"\n",
"### Multiple Input Types\n",
"\n",
"If a primitive requires multiple features as input, `input_types` has multiple elements, eg `[ColumnSchema(semantic_tags={'numeric'}), ColumnSchema(semantic_tags={'numeric'})]` would mean the primitive requires two columns with the semantic tag `numeric` as input. Below is an example of a primitive that has multiple input features."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class MeanSunday(AggregationPrimitive):\n",
" \"\"\"\n",
" Finds the mean of non-null values of a feature that occurred on Sundays\n",
" \"\"\"\n",
"\n",
" name = \"mean_sunday\"\n",
" input_types = [\n",
" ColumnSchema(semantic_tags={\"numeric\"}),\n",
" ColumnSchema(logical_type=Datetime),\n",
" ]\n",
" return_type = ColumnSchema(semantic_tags={\"numeric\"})\n",
"\n",
" def get_function(self):\n",
" def mean_sunday(numeric, datetime):\n",
" days = pd.DatetimeIndex(datetime).weekday.values\n",
" df = pd.DataFrame({\"numeric\": numeric, \"time\": days})\n",
" return df[df[\"time\"] == 6][\"numeric\"].mean()\n",
"\n",
" return mean_sunday"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"sessions\",\n",
" agg_primitives=[MeanSunday],\n",
" trans_primitives=[],\n",
" max_depth=1,\n",
")\n",
"\n",
"feature_matrix[\n",
" [\n",
" \"MEAN_SUNDAY(log.value, datetime)\",\n",
" \"MEAN_SUNDAY(log.value_2, datetime)\",\n",
" ]\n",
"]"
]
}
],
"metadata": {
"celltoolbar": "Raw Cell Format",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: docs/source/getting_started/using_entitysets.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Representing Data with EntitySets\n",
"\n",
"An ``EntitySet`` is a collection of dataframes and the relationships between them. They are useful for preparing raw, structured datasets for feature engineering. While many functions in Featuretools take ``dataframes`` and ``relationships`` as separate arguments, it is recommended to create an ``EntitySet``, so you can more easily manipulate your data as needed.\n",
"\n",
"## The Raw Data\n",
"\n",
"Below we have two tables of data (represented as Pandas DataFrames) related to customer transactions. The first is a merge of transactions, sessions, and customers so that the result looks like something you might see in a log file:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"\n",
"data = ft.demo.load_mock_customer()\n",
"transactions_df = data[\"transactions\"].merge(data[\"sessions\"]).merge(data[\"customers\"])\n",
"\n",
"transactions_df.sample(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And the second dataframe is a list of products involved in those transactions."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"products_df = data[\"products\"]\n",
"products_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Creating an EntitySet\n",
"\n",
"First, we initialize an ``EntitySet``. If you'd like to give it a name, you can optionally provide an ``id`` to the constructor."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es = ft.EntitySet(id=\"customer_data\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Adding dataframes\n",
"\n",
"To get started, we add the transactions dataframe to the `EntitySet`. In the call to ``add_dataframe``, we specify three important parameters:\n",
"\n",
"* The ``index`` parameter specifies the column that uniquely identifies rows in the dataframe.\n",
"* The ``time_index`` parameter tells Featuretools when the data was created.\n",
"* The ``logical_types`` parameter indicates that \"product_id\" should be interpreted as a Categorical column, even though it is just an integer in the underlying data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from woodwork.logical_types import Categorical, PostalCode\n",
"\n",
"es = es.add_dataframe(\n",
" dataframe_name=\"transactions\",\n",
" dataframe=transactions_df,\n",
" index=\"transaction_id\",\n",
" time_index=\"transaction_time\",\n",
" logical_types={\n",
" \"product_id\": Categorical,\n",
" \"zip_code\": PostalCode,\n",
" },\n",
")\n",
"\n",
"es"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also use a setter on the ``EntitySet`` object to add dataframes"
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext"
},
"source": [
".. currentmodule:: featuretools\n",
"\n",
"\n",
".. note ::\n",
"\n",
" You can also use a setter on the ``EntitySet`` object to add dataframes\n",
"\n",
" ``es[\"transactions\"] = transactions_df``\n",
"\n",
" that this will use the default implementation of `add_dataframe`, notably the following:\n",
"\n",
" * if the DataFrame does not have `Woodwork <https://woodwork.alteryx.com/>`_ initialized, the first column will be the index column\n",
" * if the DataFrame does not have Woodwork initialized, all columns will be inferred by Woodwork.\n",
" * if control over the time index column and logical types is needed, Woodwork should be initialized before adding the dataframe.\n",
"\n",
".. note ::\n",
"\n",
" You can also display your `EntitySet` structure graphically by calling :meth:`.EntitySet.plot`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This method associates each column in the dataframe to a [Woodwork](https://woodwork.alteryx.com/) logical type. Each logical type can have an associated standard semantic tag that helps define the column data type. If you don't specify the logical type for a column, it gets inferred based on the underlying data. The logical types and semantic tags are listed in the schema of the dataframe. For more information on working with logical types and semantic tags, take a look at the [Woodwork documention](https://woodwork.alteryx.com/)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es[\"transactions\"].ww.schema"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, we can do that same thing with our products dataframe."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es = es.add_dataframe(\n",
" dataframe_name=\"products\", dataframe=products_df, index=\"product_id\"\n",
")\n",
"\n",
"es"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"With two dataframes in our `EntitySet`, we can add a relationship between them.\n",
"\n",
"## Adding a Relationship\n",
"\n",
"We want to relate these two dataframes by the columns called \"product_id\" in each dataframe. Each product has multiple transactions associated with it, so it is called the **parent dataframe**, while the transactions dataframe is known as the **child dataframe**. When specifying relationships, we need four parameters: the parent dataframe name, the parent column name, the child dataframe name, and the child column name. Note that each relationship must denote a one-to-many relationship rather than a relationship which is one-to-one or many-to-many."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es = es.add_relationship(\"products\", \"product_id\", \"transactions\", \"product_id\")\n",
"es"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, we see the relationship has been added to our `EntitySet`.\n",
"\n",
"## Creating a dataframe from an existing table\n",
"\n",
"When working with raw data, it is common to have sufficient information to justify the creation of new dataframes. In order to create a new dataframe and relationship for sessions, we \"normalize\" the transaction dataframe."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es = es.normalize_dataframe(\n",
" base_dataframe_name=\"transactions\",\n",
" new_dataframe_name=\"sessions\",\n",
" index=\"session_id\",\n",
" make_time_index=\"session_start\",\n",
" additional_columns=[\n",
" \"device\",\n",
" \"customer_id\",\n",
" \"zip_code\",\n",
" \"session_start\",\n",
" \"join_date\",\n",
" ],\n",
")\n",
"es"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Looking at the output above, we see this method did two operations:\n",
"\n",
"1. It created a new dataframe called \"sessions\" based on the \"session_id\" and \"session_start\" columns in \"transactions\"\n",
"2. It added a relationship connecting \"transactions\" and \"sessions\"\n",
"\n",
"If we look at the schema from the transactions dataframe and the new sessions dataframe, we see two more operations that were performed automatically:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es[\"transactions\"].ww.schema"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es[\"sessions\"].ww.schema"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. It removed \"device\", \"customer_id\", \"zip_code\" and \"join_date\" from \"transactions\" and created a new columns in the sessions dataframe. This reduces redundant information as the those properties of a session don't change between transactions.\n",
"2. It copied and marked \"session_start\" as a time index column into the new sessions dataframe to indicate the beginning of a session. If the base dataframe has a time index and ``make_time_index`` is not set, ``normalize_dataframe`` will create a time index for the new dataframe. In this case it would create a new time index called \"first_transactions_time\" using the time of the first transaction of each session. If we don't want this time index to be created, we can set ``make_time_index=False``.\n",
"\n",
"If we look at the dataframes, we can see what ``normalize_dataframe`` did to the actual data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es[\"sessions\"].head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es[\"transactions\"].head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To finish preparing this dataset, create a \"customers\" dataframe using the same method call."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es = es.normalize_dataframe(\n",
" base_dataframe_name=\"sessions\",\n",
" new_dataframe_name=\"customers\",\n",
" index=\"customer_id\",\n",
" make_time_index=\"join_date\",\n",
" additional_columns=[\"zip_code\", \"join_date\"],\n",
")\n",
"\n",
"es"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using the EntitySet\n",
"\n",
"Finally, we are ready to use this EntitySet with any functionality within Featuretools. For example, let's build a feature matrix for each product in our dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, feature_defs = ft.dfs(entityset=es, target_dataframe_name=\"products\")\n",
"\n",
"feature_matrix"
]
},
{
"cell_type": "raw",
"metadata": {
"raw_mimetype": "text/restructuredtext",
"vscode": {
"languageId": "raw"
}
},
"source": [
"As we can see, the features from DFS use the relational structure of our `EntitySet`. Therefore it is important to think carefully about the dataframes that we create."
]
}
],
"metadata": {
"celltoolbar": "Raw Cell Format",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: docs/source/getting_started/woodwork_types.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"id": "b95b28c1",
"metadata": {},
"source": [
"# Woodwork Typing in Featuretools\n",
"\n",
"Featuretools relies on having consistent typing across the creation of EntitySets, Primitives, Features, and feature matrices. Previously, Featuretools used its own type system that contained objects called Variables. Now and moving forward, Featuretools will use an external data typing library for its typing: [Woodwork](https://woodwork.alteryx.com/en/stable/index.html).\n",
"\n",
"Understanding the Woodwork types that exist and how Featuretools uses Woodwork's type system will allow users to:\n",
" - build EntitySets that best represent their data\n",
" - understand the possible input and return types for Featuretools' Primitives\n",
" - understand what features will get generated from a given set of data and primitives.\n",
"\n",
"Read the [Understanding Woodwork Logical Types and Semantic Tags](https://woodwork.alteryx.com/en/stable/guides/logical_types_and_semantic_tags.html) guide for an in-depth walkthrough of the available Woodwork types that are outlined below.\n",
"\n",
"For users that are familiar with the old `Variable` objects, the [Transitioning to Featuretools Version 1.0](../resources/transition_to_ft_v1.0.ipynb) guide will be useful for converting Variable types to Woodwork types.\n",
"\n",
"## Physical Types \n",
"Physical types define how the data in a Woodwork DataFrame is stored on disk or in memory. You might also see the physical type for a column referred to as the column’s `dtype`.\n",
"\n",
"Knowing a Woodwork DataFrame's physical types is important because Pandas relies on these types when performing DataFrame operations. Each Woodwork `LogicalType` class has a single physical type associated with it.\n",
"\n",
"## Logical Types\n",
"Logical types add additional information about how data should be interpreted or parsed beyond what can be contained in a physical type. In fact, multiple logical types have the same physical type, each imparting a different meaning that's not contained in the physical type alone.\n",
"\n",
"In Featuretools, a column's logical type informs how data is read into an EntitySet and how it gets used down the line in Deep Feature Synthesis.\n",
"\n",
"Woodwork provides many different logical types, which can be seen with the `list_logical_types` function."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "497712b0",
"metadata": {},
"outputs": [],
"source": [
"import featuretools as ft\n",
"\n",
"ft.list_logical_types()"
]
},
{
"cell_type": "markdown",
"id": "cfe99d0f",
"metadata": {},
"source": [
"Featuretools will perform type inference to assign logical types to the data in EntitySets if none are provided, but it is also possible to specify which logical types should be set for any column (provided that the data in that column is compatible with the logical type).\n",
"\n",
"To learn more about how logical types are used in EntitySets, see the [Creating EntitySets](using_entitysets.ipynb) guide.\n",
"\n",
"To learn more about setting logical types directly on a DataFrame, see the Woodwork guide on [working with Logical Types](https://woodwork.alteryx.com/en/stable/guides/working_with_types_and_tags.html#Working-with-Logical-Types). \n",
"\n",
"## Semantic Tags\n",
"Semantic tags provide additional information to columns about the meaning or potential uses of data. Columns can have many or no semantic tags. Some tags are added by Woodwork, some are added by Featuretools, and users can add additional tags as they see fit.\n",
"\n",
"To learn more about setting semantic tags directly on a DataFrame, see the Woodwork guide on [working with Semantic Tags](https://woodwork.alteryx.com/en/stable/guides/working_with_types_and_tags.html#Working-with-Semantic-Tags). \n",
"\n",
"### Woodwork-defined Semantic Tags\n",
"\n",
"Woodwork will add certain semantic tags to columns at initialization. These can be standard tags that may be associated with different sets of logical types or index tags. There are also tags that users can add to confer a suggested meaning to columns in Woodwork.\n",
"\n",
"To get a list of these tags, you can use the `list_semantic_tags` function."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11f25bd9",
"metadata": {},
"outputs": [],
"source": [
"ft.list_semantic_tags()"
]
},
{
"cell_type": "markdown",
"id": "29222810",
"metadata": {},
"source": [
"Above we see the semantic tags that are defined within Woodwork. These tags inform how Featuretools is able to interpret data, an example of which can be seen in the `Age` primitive, which requires that the `date_of_birth` semantic tag be present on a column.\n",
"\n",
"The `date_of_birth` tag will not get automatically added by Woodwork, so in order for Featuretools to be able to use the `Age` primitive, the `date_of_birth` tag must be manually added to any columns to which it applies.\n",
"\n",
"### Featuretools-defined Semantic Tags\n",
"\n",
"Just like Woodwork specifies semantic tags internally, Featuretools also defines a few tags of its own that allow the full set of Features to be generated. These tags have specific meanings when they are present on a column.\n",
"\n",
"- `'last_time_index'` - added by Featuretools to the last time index column of a DataFrame. Indicates that this column has been created by Featuretools.\n",
"- `'foreign_key'` - used to indicate that this column is the child column of a relationship, meaning that this column is related to a corresponding index column of another dataframe in the EntitySet.\n",
"\n",
"\n",
"## Woodwork Throughout Featuretools\n",
"\n",
"Now that we've described the elements that make up Woodwork's type system, lets see them in action in Featuretools.\n",
"\n",
"### Woodwork in EntitySets\n",
"For more information on building EntitySets using Woodwork, see the [EntitySet guide](using_entitysets.ipynb).\n",
"\n",
"Let's look at the Woodwork typing information as it's stored in a demo EntitySet of retail data:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd9c1ec9",
"metadata": {},
"outputs": [],
"source": [
"es = ft.demo.load_retail()\n",
"es"
]
},
{
"cell_type": "markdown",
"id": "267880c4",
"metadata": {},
"source": [
"Woodwork typing information is not stored in the EntitySet object, but rather is stored in the individual DataFrames that make up the EntitySet. To look at the Woodwork typing information, we first select a single DataFrame from the EntitySet, and then access the Woodwork information via the `ww` namespace:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa1966fd",
"metadata": {},
"outputs": [],
"source": [
"df = es[\"products\"]\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "164b1138",
"metadata": {},
"outputs": [],
"source": [
"df.ww"
]
},
{
"cell_type": "markdown",
"id": "4bffac54",
"metadata": {},
"source": [
"Notice how the three columns showing this DataFrame's typing information are the three elements of typing information outlined at the beginning of this guide. To reiterate: By defining physical types, logical types, and semantic tags for each column in a DataFrame, we've defined a DataFrame's Woodwork schema, and with it, we can gain an understanding of the contents of each column.\n",
"\n",
"This column-specific typing information that exists for every column in every DataFrame in an EntitySet is an integral part of Deep Feature Synthesis' ability to generate features for an EntitySet.\n",
"\n",
"### Woodwork in DFS\n",
"As the units of computation in Featuretools, Primitives need to be able to specify the input types that they allow as well as have a predictable return type. For an in-depth explanation of Primitives in Featuretools, see the [Feature Primitives](primitives.ipynb) guide. Here, we'll look at how the Woodwork types come together into a `ColumnSchema` object to describe Primitive input and return types.\n",
"\n",
"Below is a Woodwork `ColumnSchema` that we've obtained from the `'product_id'` column in the `products` DataFrame in the retail EntitySet."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "349e5274",
"metadata": {},
"outputs": [],
"source": [
"products_df = es[\"products\"]\n",
"product_ids_series = products_df.ww[\"product_id\"]\n",
"column_schema = product_ids_series.ww.schema\n",
"column_schema"
]
},
{
"cell_type": "markdown",
"id": "8e8c0ccf",
"metadata": {},
"source": [
"This combination of logical type and semantic tag typing information is a `ColumnSchema`. In the case above, the `ColumnSchema` describes the **type definition** for a single column of data. \n",
"\n",
"Notice that there is no physical type in a `ColumnSchema`. This is because a `ColumnSchema` is a collection of Woodwork types that doesn't have any data tied to it and therefore has no physical representation. Because a `ColumnSchema` object is not tied to any data, it can also be used to describe a **type space** into which other columns may or may not fall.\n",
"\n",
"This flexibility of the `ColumnSchema` class allows `ColumnSchema` objects to be used both as type definitions for every column in an EntitySet as well as input and return type spaces for every Primitive in Featuretools.\n",
"\n",
"Let's look at a different column in a different DataFrame to see how this works:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3bb3ffe",
"metadata": {},
"outputs": [],
"source": [
"order_products_df = es[\"order_products\"]\n",
"order_products_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1aae3378",
"metadata": {},
"outputs": [],
"source": [
"quantity_series = order_products_df.ww[\"quantity\"]\n",
"column_schema = quantity_series.ww.schema\n",
"column_schema"
]
},
{
"cell_type": "markdown",
"id": "f067db9a",
"metadata": {},
"source": [
"The `ColumnSchema` above has been pulled from the `'quantity'` column in the `order_products` DataFrame in the retail EntitySet. This is a **type definition**. \n",
"\n",
"If we look at the Woodwork typing information for the `order_products` DataFrame, we can see that there are several columns that will have similar `ColumnSchema` type definitions. If we wanted to describe subsets of those columns, we could define several `ColumnSchema` **type spaces**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc2bfae6",
"metadata": {},
"outputs": [],
"source": [
"es[\"order_products\"].ww"
]
},
{
"cell_type": "markdown",
"id": "73257dcf",
"metadata": {},
"source": [
"Below are several `ColumnSchema`s that all would include our `quantity` column, but each of them describes a different type space. These `ColumnSchema`s get more restrictive as we go down:\n",
"\n",
"##### Entire DataFrame\n",
"No restrictions have been placed; any column falls into this definition. This would include the whole DataFrame."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6614c98",
"metadata": {},
"outputs": [],
"source": [
"from woodwork.column_schema import ColumnSchema\n",
"\n",
"ColumnSchema()"
]
},
{
"cell_type": "markdown",
"id": "299fc7d2",
"metadata": {},
"source": [
"An example of a Primitive with this `ColumnSchema` as its input type is the `IsNull` transform primitive.\n",
"\n",
"##### By Semantic Tag\n",
"Only columns with the `numeric` tag apply. This can include Double, Integer, and Age logical type columns as well. It will not include the `index` column which, despite containing integers, has had its standard tags replaced by the `'index'` tag."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16c1a5a9",
"metadata": {},
"outputs": [],
"source": [
"ColumnSchema(semantic_tags={\"numeric\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0932d05d",
"metadata": {},
"outputs": [],
"source": [
"df = es[\"order_products\"].ww.select(include=\"numeric\")\n",
"df.ww"
]
},
{
"cell_type": "markdown",
"id": "a5ec95c8",
"metadata": {},
"source": [
"And example of a Primitive with this `ColumnSchema` as its input type is the `Mean` aggregation primitive.\n",
"\n",
"##### By Logical Type\n",
"Only columns with logical type of `Integer` are included in this definition. Does not require the `numeric` tag, so an index column (which has its standard tags removed) would still apply."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79bd3d4f",
"metadata": {},
"outputs": [],
"source": [
"from woodwork.logical_types import Integer\n",
"\n",
"ColumnSchema(logical_type=Integer)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e905229e",
"metadata": {},
"outputs": [],
"source": [
"df = es[\"order_products\"].ww.select(include=\"Integer\")\n",
"df.ww"
]
},
{
"cell_type": "markdown",
"id": "2f752200",
"metadata": {},
"source": [
"##### By Logical Type and Semantic Tag\n",
"The column must have logical type `Integer` and have the `numeric` semantic tag, excluding index columns."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6da51b75",
"metadata": {},
"outputs": [],
"source": [
"ColumnSchema(logical_type=Integer, semantic_tags={\"numeric\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a96d92f6",
"metadata": {},
"outputs": [],
"source": [
"df = es[\"order_products\"].ww.select(include=\"numeric\")\n",
"df = df.ww.select(include=\"Integer\")\n",
"df.ww"
]
},
{
"cell_type": "markdown",
"id": "71e0359b",
"metadata": {},
"source": [
"In this way, a `ColumnSchema` can define a type space under which columns in a Woodwork DataFrame can fall. This is how Featuretools determines which columns in a DataFrame are valid for a Primitive in building Features during DFS.\n",
"\n",
"Each Primitive has `input_types` and a `return_type` that are described by a Woodwork `ColumnSchema`. Every DataFrame in an EntitySet has Woodwork initialized on it. This means that when an EntitySet is passed into DFS, Featuretools can select the relevant columns in the DataFrame that are valid for the Primitive's `input_types`. We then get a Feature that has a `column_schema` property that indicates what that Feature's typing definition is in a way that lets DFS stack features on top of one another.\n",
"\n",
"In this way, Featuretools is able to leverage the base unit of Woodwork typing information, the `ColumnSchema`, and use it in concert with an EntitySet of Woodwork DataFrames in order to build Features with Deep Feature Synthesis."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
================================================
FILE: docs/source/guides/advanced_custom_primitives.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Advanced Custom Primitives Guide"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"import numpy as np\n",
"from woodwork.column_schema import ColumnSchema\n",
"from woodwork.logical_types import Datetime, NaturalLanguage\n",
"\n",
"import featuretools as ft\n",
"from featuretools.primitives import TransformPrimitive\n",
"from featuretools.tests.testing_utils import make_ecommerce_entityset"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Primitives with Additional Arguments\n",
"\n",
"Some features require more advanced calculations than others. Advanced features usually entail additional arguments to help output the desired value. With custom primitives, you can use primitive arguments to help you create advanced features.\n",
"\n",
"### String Count Example\n",
"\n",
"In this example, you will learn how to make custom primitives that take in additional arguments. You will create a primitive to count the number of times a specific string value occurs inside a text.\n",
"\n",
"First, derive a new transform primitive class using `TransformPrimitive` as a base. The primitive will take in a text column as the input and return a numeric column as the output, so set the input type to a Woodwork `ColumnSchema` with logical type `NaturalLanguage` and the return type to a Woodwork `ColumnSchema` with the semantic tag `'numeric'`. The specific string value is the additional argument, so define it as a *keyword* argument inside `__init__`. Then, override `get_function` to return a primitive function that will calculate the feature.\n",
"\n",
"Featuretools' primitives use Woodwork's `ColumnSchema` to control the input and return types of columns for the primitive. For more information about using the Woodwork typing system in Featuretools, see the [Woodwork Typing in Featuretools](../getting_started/woodwork_types.ipynb) guide."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class StringCount(TransformPrimitive):\n",
" \"\"\"Count the number of times the string value occurs.\"\"\"\n",
"\n",
" name = \"string_count\"\n",
" input_types = [ColumnSchema(logical_type=NaturalLanguage)]\n",
" return_type = ColumnSchema(semantic_tags={\"numeric\"})\n",
"\n",
" def __init__(self, string=None):\n",
" self.string = string\n",
"\n",
" def get_function(self):\n",
" def string_count(column):\n",
" assert self.string is not None, \"string to count needs to be defined\"\n",
" # this is a naive implementation used for clarity\n",
" counts = [text.lower().count(self.string) for text in column]\n",
" return counts\n",
"\n",
" return string_count"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now you have a primitive that is reusable for different string values. For example, you can create features based on the number of times the word \"the\" appears in a text. Create an instance of the primitive where the string value is \"the\" and pass the primitive into DFS to generate the features. The feature name will automatically reflect the string value of the primitive."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es = make_ecommerce_entityset()\n",
"\n",
"feature_matrix, features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"sessions\",\n",
" agg_primitives=[\"sum\", \"mean\", \"std\"],\n",
" trans_primitives=[StringCount(string=\"the\")],\n",
")\n",
"\n",
"feature_matrix[\n",
" [\n",
" \"STD(log.STRING_COUNT(comments, string=the))\",\n",
" \"SUM(log.STRING_COUNT(comments, string=the))\",\n",
" \"MEAN(log.STRING_COUNT(comments, string=the))\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Features with Multiple Outputs\n",
"\n",
"Some calculations output more than a single value. With custom primitives, you can make the most of these calculations by creating a feature for each output value.\n",
"\n",
"### Case Count Example\n",
"\n",
"In this example, you will learn how to make custom primitives that output multiple features. You will create a primitive that outputs the count of upper case and lower case letters of a text.\n",
"\n",
"First, derive a new transform primitive class using `TransformPrimitive` as a base. The primitive will take in a text column as the input and return two numeric columns as the output, so set the input type to a Woodwork `ColumnSchema` with logical type `NaturalLanguage` and the return type to a Woodwork `ColumnSchema` with semantic tag `'numeric'`. Since this primitive returns two columns, also set `number_output_features` to two. Then, override `get_function` to return a primitive function that will calculate the feature and return a list of columns."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class CaseCount(TransformPrimitive):\n",
" \"\"\"Return the count of upper case and lower case letters of a text.\"\"\"\n",
"\n",
" name = \"case_count\"\n",
" input_types = [ColumnSchema(logical_type=NaturalLanguage)]\n",
" return_type = ColumnSchema(semantic_tags={\"numeric\"})\n",
" number_output_features = 2\n",
"\n",
" def get_function(self):\n",
" def case_count(array):\n",
" # this is a naive implementation used for clarity\n",
" upper = np.array([len(re.findall(\"[A-Z]\", i)) for i in array])\n",
" lower = np.array([len(re.findall(\"[a-z]\", i)) for i in array])\n",
" return upper, lower\n",
"\n",
" return case_count"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now you have a primitive that outputs two columns. One column contains the count for the upper case letters. The other column contains the count for the lower case letters. Pass the primitive into DFS to generate features. By default, the feature name will reflect the index of the output."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"sessions\",\n",
" agg_primitives=[],\n",
" trans_primitives=[CaseCount],\n",
")\n",
"\n",
"feature_matrix[\n",
" [\n",
" \"customers.CASE_COUNT(favorite_quote)[0]\",\n",
" \"customers.CASE_COUNT(favorite_quote)[1]\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Custom Naming for Multiple Outputs\n",
"\n",
"When you create a primitive that outputs multiple features, you can also define custom naming for each of those features.\n",
"\n",
"### Hourly Sine and Cosine Example\n",
"\n",
"In this example, you will learn how to apply custom naming for multiple outputs. You will create a primitive that outputs the sine and cosine of the hour.\n",
"\n",
"First, derive a new transform primitive class using `TransformPrimitive` as a base. The primitive will take in the time index as the input and return two numeric columns as the output. Set the input type to a Woodwork `ColumnSchema` with a logical type of `Datetime` and the semantic tag `'time_index'`. Next, set the return type to a Woodwork `ColumnSchema` with semantic tag `'numeric'` and set `number_output_features` to two. Then, override `get_function` to return a primitive function that will calculate the feature and return a list of columns. Also, override `generate_names` to return a list of the feature names that you define."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class HourlySineAndCosine(TransformPrimitive):\n",
" \"\"\"Returns the sine and cosine of the hour.\"\"\"\n",
"\n",
" name = \"hourly_sine_and_cosine\"\n",
" input_types = [ColumnSchema(logical_type=Datetime, semantic_tags={\"time_index\"})]\n",
" return_type = ColumnSchema(semantic_tags={\"numeric\"})\n",
"\n",
" number_output_features = 2\n",
"\n",
" def get_function(self):\n",
" def hourly_sine_and_cosine(column):\n",
" sine = np.sin(column.dt.hour)\n",
" cosine = np.cos(column.dt.hour)\n",
" return sine, cosine\n",
"\n",
" return hourly_sine_and_cosine\n",
"\n",
" def generate_names(self, base_feature_names):\n",
" name = self.generate_name(base_feature_names)\n",
" return f\"{name}[sine]\", f\"{name}[cosine]\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now you have a primitive that outputs two columns. One column contains the sine of the hour. The other column contains the cosine of the hour. Pass the primitive into DFS to generate features. The feature name will reflect the custom naming you defined."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_matrix, features = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"log\",\n",
" agg_pr
gitextract_b07mgx0i/ ├── .codecov.yml ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── blank_issue.md │ │ ├── bug_report.md │ │ ├── config.yml │ │ ├── documentation_improvement.md │ │ └── feature_request.md │ ├── auto_assign.yml │ └── workflows/ │ ├── auto_approve_dependency_PRs.yaml │ ├── broken_link_check.yaml │ ├── build_docs.yaml │ ├── create_feedstock_pr.yaml │ ├── install_test.yaml │ ├── kickoff_evalml_unit_tests.yaml │ ├── latest_dependency_checker.yaml │ ├── lint_check.yaml │ ├── minimum_dependency_checker.yaml │ ├── performance-check.yaml │ ├── pull_request_check.yaml │ ├── release.yaml │ ├── release_notes_updated.yaml │ ├── test_without_test_dependencies.yaml │ ├── tests_with_latest_deps.yaml │ ├── tests_with_minimum_deps.yaml │ └── tests_with_woodwork_main_branch.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── LICENSE ├── Makefile ├── README.md ├── contributing.md ├── docs/ │ ├── Makefile │ ├── backport_release.md │ ├── make.bat │ ├── notebook_version_standardizer.py │ ├── pull_request_template.md │ └── source/ │ ├── _static/ │ │ └── style.css │ ├── api_reference.rst │ ├── conf.py │ ├── getting_started/ │ │ ├── afe.ipynb │ │ ├── getting_started_index.rst │ │ ├── handling_time.ipynb │ │ ├── primitives.ipynb │ │ ├── using_entitysets.ipynb │ │ └── woodwork_types.ipynb │ ├── guides/ │ │ ├── advanced_custom_primitives.ipynb │ │ ├── deployment.ipynb │ │ ├── feature_descriptions.ipynb │ │ ├── feature_selection.ipynb │ │ ├── guides_index.rst │ │ ├── performance.ipynb │ │ ├── specifying_primitive_options.ipynb │ │ ├── sql_database_integration.ipynb │ │ ├── time_series.ipynb │ │ └── tuning_dfs.ipynb │ ├── index.ipynb │ ├── install.md │ ├── release_notes.rst │ ├── resources/ │ │ ├── ecosystem.rst │ │ ├── frequently_asked_questions.ipynb │ │ ├── help.rst │ │ ├── resources_index.rst │ │ ├── transition_to_ft_v1.0.ipynb │ │ └── usage_tips/ │ │ ├── glossary.rst │ │ └── limitations.rst │ ├── set-headers.py │ ├── setup.py │ └── templates/ │ └── layout.html ├── featuretools/ │ ├── __init__.py │ ├── __main__.py │ ├── computational_backends/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── calculate_feature_matrix.py │ │ ├── feature_set.py │ │ ├── feature_set_calculator.py │ │ └── utils.py │ ├── config_init.py │ ├── demo/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── flight.py │ │ ├── mock_customer.py │ │ ├── retail.py │ │ └── weather.py │ ├── entityset/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── deserialize.py │ │ ├── entityset.py │ │ ├── relationship.py │ │ ├── serialize.py │ │ └── timedelta.py │ ├── exceptions.py │ ├── feature_base/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── cache.py │ │ ├── feature_base.py │ │ ├── feature_descriptions.py │ │ ├── feature_visualizer.py │ │ ├── features_deserializer.py │ │ ├── features_serializer.py │ │ └── utils.py │ ├── feature_discovery/ │ │ ├── FeatureCollection.py │ │ ├── LiteFeature.py │ │ ├── __init__.py │ │ ├── convertors.py │ │ ├── feature_discovery.py │ │ ├── type_defs.py │ │ └── utils.py │ ├── primitives/ │ │ ├── __init__.py │ │ ├── base/ │ │ │ ├── __init__.py │ │ │ ├── aggregation_primitive_base.py │ │ │ ├── primitive_base.py │ │ │ └── transform_primitive_base.py │ │ ├── options_utils.py │ │ ├── standard/ │ │ │ ├── __init__.py │ │ │ ├── aggregation/ │ │ │ │ ├── __init__.py │ │ │ │ ├── all_primitive.py │ │ │ │ ├── any_primitive.py │ │ │ │ ├── average_count_per_unique.py │ │ │ │ ├── avg_time_between.py │ │ │ │ ├── count.py │ │ │ │ ├── count_above_mean.py │ │ │ │ ├── count_below_mean.py │ │ │ │ ├── count_greater_than.py │ │ │ │ ├── count_inside_nth_std.py │ │ │ │ ├── count_inside_range.py │ │ │ │ ├── count_less_than.py │ │ │ │ ├── count_outside_nth_std.py │ │ │ │ ├── count_outside_range.py │ │ │ │ ├── date_first_event.py │ │ │ │ ├── entropy.py │ │ │ │ ├── first.py │ │ │ │ ├── first_last_time_delta.py │ │ │ │ ├── has_no_duplicates.py │ │ │ │ ├── is_monotonically_decreasing.py │ │ │ │ ├── is_monotonically_increasing.py │ │ │ │ ├── is_unique.py │ │ │ │ ├── kurtosis.py │ │ │ │ ├── last.py │ │ │ │ ├── max_consecutive_false.py │ │ │ │ ├── max_consecutive_negatives.py │ │ │ │ ├── max_consecutive_positives.py │ │ │ │ ├── max_consecutive_true.py │ │ │ │ ├── max_consecutive_zeros.py │ │ │ │ ├── max_count.py │ │ │ │ ├── max_min_delta.py │ │ │ │ ├── max_primitive.py │ │ │ │ ├── mean.py │ │ │ │ ├── median.py │ │ │ │ ├── median_count.py │ │ │ │ ├── min_count.py │ │ │ │ ├── min_primitive.py │ │ │ │ ├── mode.py │ │ │ │ ├── n_most_common.py │ │ │ │ ├── n_most_common_frequency.py │ │ │ │ ├── n_unique_days.py │ │ │ │ ├── n_unique_days_of_calendar_year.py │ │ │ │ ├── n_unique_days_of_month.py │ │ │ │ ├── n_unique_months.py │ │ │ │ ├── n_unique_weeks.py │ │ │ │ ├── num_consecutive_greater_mean.py │ │ │ │ ├── num_consecutive_less_mean.py │ │ │ │ ├── num_false_since_last_true.py │ │ │ │ ├── num_peaks.py │ │ │ │ ├── num_true.py │ │ │ │ ├── num_true_since_last_false.py │ │ │ │ ├── num_unique.py │ │ │ │ ├── num_zero_crossings.py │ │ │ │ ├── percent_true.py │ │ │ │ ├── percent_unique.py │ │ │ │ ├── skew.py │ │ │ │ ├── std.py │ │ │ │ ├── sum_primitive.py │ │ │ │ ├── time_since_first.py │ │ │ │ ├── time_since_last.py │ │ │ │ ├── time_since_last_false.py │ │ │ │ ├── time_since_last_max.py │ │ │ │ ├── time_since_last_min.py │ │ │ │ ├── time_since_last_true.py │ │ │ │ ├── trend.py │ │ │ │ └── variance.py │ │ │ └── transform/ │ │ │ ├── __init__.py │ │ │ ├── absolute_diff.py │ │ │ ├── binary/ │ │ │ │ ├── __init__.py │ │ │ │ ├── add_numeric.py │ │ │ │ ├── add_numeric_scalar.py │ │ │ │ ├── and_primitive.py │ │ │ │ ├── divide_by_feature.py │ │ │ │ ├── divide_numeric.py │ │ │ │ ├── divide_numeric_scalar.py │ │ │ │ ├── equal.py │ │ │ │ ├── equal_scalar.py │ │ │ │ ├── greater_than.py │ │ │ │ ├── greater_than_equal_to.py │ │ │ │ ├── greater_than_equal_to_scalar.py │ │ │ │ ├── greater_than_scalar.py │ │ │ │ ├── less_than.py │ │ │ │ ├── less_than_equal_to.py │ │ │ │ ├── less_than_equal_to_scalar.py │ │ │ │ ├── less_than_scalar.py │ │ │ │ ├── modulo_by_feature.py │ │ │ │ ├── modulo_numeric.py │ │ │ │ ├── modulo_numeric_scalar.py │ │ │ │ ├── multiply_boolean.py │ │ │ │ ├── multiply_numeric.py │ │ │ │ ├── multiply_numeric_boolean.py │ │ │ │ ├── multiply_numeric_scalar.py │ │ │ │ ├── not_equal.py │ │ │ │ ├── not_equal_scalar.py │ │ │ │ ├── or_primitive.py │ │ │ │ ├── scalar_subtract_numeric_feature.py │ │ │ │ ├── subtract_numeric.py │ │ │ │ └── subtract_numeric_scalar.py │ │ │ ├── cumulative/ │ │ │ │ ├── __init__.py │ │ │ │ ├── cum_count.py │ │ │ │ ├── cum_max.py │ │ │ │ ├── cum_mean.py │ │ │ │ ├── cum_min.py │ │ │ │ ├── cum_sum.py │ │ │ │ ├── cumulative_time_since_last_false.py │ │ │ │ └── cumulative_time_since_last_true.py │ │ │ ├── datetime/ │ │ │ │ ├── __init__.py │ │ │ │ ├── age.py │ │ │ │ ├── date_to_holiday.py │ │ │ │ ├── date_to_timezone.py │ │ │ │ ├── day.py │ │ │ │ ├── day_of_year.py │ │ │ │ ├── days_in_month.py │ │ │ │ ├── diff_datetime.py │ │ │ │ ├── distance_to_holiday.py │ │ │ │ ├── hour.py │ │ │ │ ├── is_federal_holiday.py │ │ │ │ ├── is_first_week_of_month.py │ │ │ │ ├── is_leap_year.py │ │ │ │ ├── is_lunch_time.py │ │ │ │ ├── is_month_end.py │ │ │ │ ├── is_month_start.py │ │ │ │ ├── is_quarter_end.py │ │ │ │ ├── is_quarter_start.py │ │ │ │ ├── is_weekend.py │ │ │ │ ├── is_working_hours.py │ │ │ │ ├── is_year_end.py │ │ │ │ ├── is_year_start.py │ │ │ │ ├── minute.py │ │ │ │ ├── month.py │ │ │ │ ├── part_of_day.py │ │ │ │ ├── quarter.py │ │ │ │ ├── season.py │ │ │ │ ├── second.py │ │ │ │ ├── time_since.py │ │ │ │ ├── time_since_previous.py │ │ │ │ ├── utils.py │ │ │ │ ├── week.py │ │ │ │ ├── weekday.py │ │ │ │ └── year.py │ │ │ ├── email/ │ │ │ │ ├── __init__.py │ │ │ │ ├── email_address_to_domain.py │ │ │ │ └── is_free_email_domain.py │ │ │ ├── exponential/ │ │ │ │ ├── __init__.py │ │ │ │ ├── exponential_weighted_average.py │ │ │ │ ├── exponential_weighted_std.py │ │ │ │ └── exponential_weighted_variance.py │ │ │ ├── file_extension.py │ │ │ ├── full_name_to_first_name.py │ │ │ ├── full_name_to_last_name.py │ │ │ ├── full_name_to_title.py │ │ │ ├── is_in.py │ │ │ ├── is_null.py │ │ │ ├── latlong/ │ │ │ │ ├── __init__.py │ │ │ │ ├── cityblock_distance.py │ │ │ │ ├── geomidpoint.py │ │ │ │ ├── haversine.py │ │ │ │ ├── is_in_geobox.py │ │ │ │ ├── latitude.py │ │ │ │ ├── longitude.py │ │ │ │ └── utils.py │ │ │ ├── natural_language/ │ │ │ │ ├── __init__.py │ │ │ │ ├── constants.py │ │ │ │ ├── count_string.py │ │ │ │ ├── mean_characters_per_word.py │ │ │ │ ├── median_word_length.py │ │ │ │ ├── num_characters.py │ │ │ │ ├── num_unique_separators.py │ │ │ │ ├── num_words.py │ │ │ │ ├── number_of_common_words.py │ │ │ │ ├── number_of_hashtags.py │ │ │ │ ├── number_of_mentions.py │ │ │ │ ├── number_of_unique_words.py │ │ │ │ ├── number_of_words_in_quotes.py │ │ │ │ ├── punctuation_count.py │ │ │ │ ├── title_word_count.py │ │ │ │ ├── total_word_length.py │ │ │ │ ├── upper_case_count.py │ │ │ │ ├── upper_case_word_count.py │ │ │ │ └── whitespace_count.py │ │ │ ├── not_primitive.py │ │ │ ├── nth_week_of_month.py │ │ │ ├── numeric/ │ │ │ │ ├── __init__.py │ │ │ │ ├── absolute.py │ │ │ │ ├── cosine.py │ │ │ │ ├── diff.py │ │ │ │ ├── natural_logarithm.py │ │ │ │ ├── negate.py │ │ │ │ ├── percentile.py │ │ │ │ ├── rate_of_change.py │ │ │ │ ├── same_as_previous.py │ │ │ │ ├── sine.py │ │ │ │ ├── square_root.py │ │ │ │ └── tangent.py │ │ │ ├── percent_change.py │ │ │ ├── postal/ │ │ │ │ ├── __init__.py │ │ │ │ ├── one_digit_postal_code.py │ │ │ │ └── two_digit_postal_code.py │ │ │ ├── savgol_filter.py │ │ │ ├── time_series/ │ │ │ │ ├── __init__.py │ │ │ │ ├── expanding/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── expanding_count.py │ │ │ │ │ ├── expanding_max.py │ │ │ │ │ ├── expanding_mean.py │ │ │ │ │ ├── expanding_min.py │ │ │ │ │ ├── expanding_std.py │ │ │ │ │ └── expanding_trend.py │ │ │ │ ├── lag.py │ │ │ │ ├── numeric_lag.py │ │ │ │ ├── rolling_count.py │ │ │ │ ├── rolling_max.py │ │ │ │ ├── rolling_mean.py │ │ │ │ ├── rolling_min.py │ │ │ │ ├── rolling_outlier_count.py │ │ │ │ ├── rolling_std.py │ │ │ │ ├── rolling_trend.py │ │ │ │ └── utils.py │ │ │ └── url/ │ │ │ ├── __init__.py │ │ │ ├── url_to_domain.py │ │ │ ├── url_to_protocol.py │ │ │ └── url_to_tld.py │ │ └── utils.py │ ├── selection/ │ │ ├── __init__.py │ │ ├── api.py │ │ └── selection.py │ ├── synthesis/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── deep_feature_synthesis.py │ │ ├── dfs.py │ │ ├── encode_features.py │ │ ├── get_valid_primitives.py │ │ └── utils.py │ ├── tests/ │ │ ├── __init__.py │ │ ├── computational_backend/ │ │ │ ├── __init__.py │ │ │ ├── test_calculate_feature_matrix.py │ │ │ ├── test_feature_set.py │ │ │ ├── test_feature_set_calculator.py │ │ │ └── test_utils.py │ │ ├── config_tests/ │ │ │ ├── __init__.py │ │ │ └── test_config.py │ │ ├── conftest.py │ │ ├── demo_tests/ │ │ │ ├── __init__.py │ │ │ └── test_demo_data.py │ │ ├── entityset_tests/ │ │ │ ├── __init__.py │ │ │ ├── test_es.py │ │ │ ├── test_es_metadata.py │ │ │ ├── test_last_time_index.py │ │ │ ├── test_plotting.py │ │ │ ├── test_relationship.py │ │ │ ├── test_serialization.py │ │ │ ├── test_timedelta.py │ │ │ └── test_ww_es.py │ │ ├── entry_point_tests/ │ │ │ ├── __init__.py │ │ │ ├── add-ons/ │ │ │ │ ├── __init__.py │ │ │ │ ├── featuretools_plugin/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── featuretools_plugin/ │ │ │ │ │ │ └── __init__.py │ │ │ │ │ └── setup.py │ │ │ │ └── featuretools_primitives/ │ │ │ │ ├── __init__.py │ │ │ │ ├── featuretools_primitives/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── existing_primitive.py │ │ │ │ │ ├── invalid_primitive.py │ │ │ │ │ └── new_primitive.py │ │ │ │ └── setup.py │ │ │ ├── test_plugin.py │ │ │ ├── test_primitives.py │ │ │ └── utils.py │ │ ├── feature_discovery/ │ │ │ ├── __init__.py │ │ │ ├── test_convertors.py │ │ │ ├── test_feature_collection.py │ │ │ ├── test_feature_discovery.py │ │ │ └── test_type_defs.py │ │ ├── primitive_tests/ │ │ │ ├── __init__.py │ │ │ ├── aggregation_primitive_tests/ │ │ │ │ ├── __init__.py │ │ │ │ ├── test_agg_primitives.py │ │ │ │ ├── test_count_aggregation_primitives.py │ │ │ │ ├── test_max_consecutive.py │ │ │ │ ├── test_num_consecutive.py │ │ │ │ ├── test_percent_true.py │ │ │ │ ├── test_rolling_primitive.py │ │ │ │ └── test_time_since.py │ │ │ ├── bad_primitive_files/ │ │ │ │ ├── __init__.py │ │ │ │ ├── multiple_primitives.py │ │ │ │ └── no_primitives.py │ │ │ ├── natural_language_primitives_tests/ │ │ │ │ ├── __init__.py │ │ │ │ ├── test_count_string.py │ │ │ │ ├── test_mean_characters_per_word.py │ │ │ │ ├── test_median_word_length.py │ │ │ │ ├── test_natural_language_primitives_terminate.py │ │ │ │ ├── test_num_characters.py │ │ │ │ ├── test_num_unique_separators.py │ │ │ │ ├── test_num_words.py │ │ │ │ ├── test_number_of_common_words.py │ │ │ │ ├── test_number_of_hashtags.py │ │ │ │ ├── test_number_of_mentions.py │ │ │ │ ├── test_number_of_unique_words.py │ │ │ │ ├── test_number_of_words_in_quotes.py │ │ │ │ ├── test_punctuation_count.py │ │ │ │ ├── test_title_word_count.py │ │ │ │ ├── test_total_word_length.py │ │ │ │ ├── test_upper_case_count.py │ │ │ │ ├── test_upper_case_word_count.py │ │ │ │ └── test_whitespace_count.py │ │ │ ├── primitives_to_install/ │ │ │ │ ├── __init__.py │ │ │ │ ├── custom_max.py │ │ │ │ ├── custom_mean.py │ │ │ │ └── custom_sum.py │ │ │ ├── test_absolute_diff.py │ │ │ ├── test_agg_feats.py │ │ │ ├── test_all_primitive_docstrings.py │ │ │ ├── test_direct_features.py │ │ │ ├── test_feature_base.py │ │ │ ├── test_feature_descriptions.py │ │ │ ├── test_feature_serialization.py │ │ │ ├── test_feature_utils.py │ │ │ ├── test_feature_visualizer.py │ │ │ ├── test_features_deserializer.py │ │ │ ├── test_features_serializer.py │ │ │ ├── test_groupby_transform_primitives.py │ │ │ ├── test_identity_features.py │ │ │ ├── test_overrides.py │ │ │ ├── test_primitive_base.py │ │ │ ├── test_primitive_utils.py │ │ │ ├── test_rolling_primitive_utils.py │ │ │ ├── test_transform_features.py │ │ │ ├── transform_primitive_tests/ │ │ │ │ ├── __init__.py │ │ │ │ ├── test_cumulative_time_since.py │ │ │ │ ├── test_datetoholiday_primitive.py │ │ │ │ ├── test_distancetoholiday_primitive.py │ │ │ │ ├── test_expanding_primitives.py │ │ │ │ ├── test_exponential_primitives.py │ │ │ │ ├── test_full_name_primitives.py │ │ │ │ ├── test_is_federal_holiday.py │ │ │ │ ├── test_latlong_primitives.py │ │ │ │ ├── test_percent_change.py │ │ │ │ ├── test_percent_unique.py │ │ │ │ ├── test_postal_primitives.py │ │ │ │ ├── test_same_as_previous.py │ │ │ │ ├── test_savgol_filter.py │ │ │ │ ├── test_season.py │ │ │ │ └── test_transform_primitive.py │ │ │ └── utils.py │ │ ├── profiling/ │ │ │ ├── __init__.py │ │ │ └── dfs_profile.py │ │ ├── requirement_files/ │ │ │ ├── latest_requirements.txt │ │ │ ├── minimum_core_requirements.txt │ │ │ ├── minimum_dask_requirements.txt │ │ │ └── minimum_test_requirements.txt │ │ ├── selection/ │ │ │ ├── __init__.py │ │ │ └── test_selection.py │ │ ├── synthesis/ │ │ │ ├── __init__.py │ │ │ ├── test_deep_feature_synthesis.py │ │ │ ├── test_dfs_method.py │ │ │ ├── test_encode_features.py │ │ │ └── test_get_valid_primitives.py │ │ ├── test_version.py │ │ ├── testing_utils/ │ │ │ ├── __init__.py │ │ │ ├── cluster.py │ │ │ ├── es_utils.py │ │ │ ├── features.py │ │ │ ├── generate_fake_dataframe.py │ │ │ └── mock_ds.py │ │ └── utils_tests/ │ │ ├── __init__.py │ │ ├── test_config.py │ │ ├── test_description_utils.py │ │ ├── test_entry_point.py │ │ ├── test_gen_utils.py │ │ ├── test_recommend_primitives.py │ │ ├── test_time_utils.py │ │ ├── test_trie.py │ │ └── test_utils_info.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── common_tld_utils.py │ │ ├── description_utils.py │ │ ├── entry_point.py │ │ ├── gen_utils.py │ │ ├── plot_utils.py │ │ ├── recommend_primitives.py │ │ ├── s3_utils.py │ │ ├── schema_utils.py │ │ ├── time_utils.py │ │ ├── trie.py │ │ ├── utils_info.py │ │ └── wrangle.py │ └── version.py ├── pyproject.toml └── release.md
Showing preview only (241K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (2634 symbols across 365 files)
FILE: docs/notebook_version_standardizer.py
function _get_ipython_notebooks (line 9) | def _get_ipython_notebooks(docs_source):
function _check_delete_empty_cell (line 21) | def _check_delete_empty_cell(notebook, delete=True):
function _check_execution_and_output (line 38) | def _check_execution_and_output(notebook):
function _check_python_version (line 49) | def _check_python_version(notebook, default_version):
function _fix_python_version (line 57) | def _fix_python_version(notebook, default_version):
function _fix_execution_and_output (line 65) | def _fix_execution_and_output(notebook):
function _get_notebooks_with_executions_and_empty (line 78) | def _get_notebooks_with_executions_and_empty(notebooks, default_version=...
function _fix_versions (line 92) | def _fix_versions(notebooks, default_version="3.9.2"):
function _remove_notebook_empty_last_cell (line 97) | def _remove_notebook_empty_last_cell(notebooks):
function _standardize_outputs (line 102) | def _standardize_outputs(notebooks):
function cli (line 108) | def cli():
function standardize (line 113) | def standardize():
function check_execution (line 138) | def check_execution():
FILE: docs/source/conf.py
function setup (line 401) | def setup(app):
FILE: docs/source/setup.py
function load_feature_plots (line 6) | def load_feature_plots():
FILE: featuretools/computational_backends/calculate_feature_matrix.py
function calculate_feature_matrix (line 51) | def calculate_feature_matrix(
function calculate_chunk (line 372) | def calculate_chunk(
function approximate_features (line 578) | def approximate_features(
function scatter_warning (line 691) | def scatter_warning(num_scattered_workers, num_workers):
function parallel_calculate_chunks (line 697) | def parallel_calculate_chunks(
function _add_approx_dataframe_index_col (line 843) | def _add_approx_dataframe_index_col(es, target_dataframe_name, cutoffs, ...
function _chunk_dataframe_groups (line 876) | def _chunk_dataframe_groups(grouped, chunk_size):
function _handle_chunk_size (line 887) | def _handle_chunk_size(chunk_size, total_size):
function update_progress_callback_parameters (line 899) | def update_progress_callback_parameters(progress_bar, previous_progress):
function init_ww_and_concat_fm (line 906) | def init_ww_and_concat_fm(feature_matrix, ww_init_kwargs):
FILE: featuretools/computational_backends/feature_set.py
class FeatureSet (line 17) | class FeatureSet(object):
method __init__ (line 23) | def __init__(self, features, approximate_feature_trie=None):
method feature_trie (line 69) | def feature_trie(self):
method _build_feature_trie (line 88) | def _build_feature_trie(self):
method _add_feature_to_trie (line 102) | def _add_feature_to_trie(
method group_features (line 157) | def group_features(self, feature_names):
method _get_feature_depths (line 185) | def _get_feature_depths(self, features):
method uses_full_dataframe (line 211) | def uses_full_dataframe(self, feature, check_dependents=False):
method _dependent_uses_full_dataframe (line 219) | def _dependent_uses_full_dataframe(self, feature):
function _get_use_previous (line 229) | def _get_use_previous(
function _get_where (line 243) | def _get_where(f):
function _get_groupby (line 250) | def _get_groupby(f):
FILE: featuretools/computational_backends/feature_set_calculator.py
class FeatureSetCalculator (line 21) | class FeatureSetCalculator(object):
method __init__ (line 26) | def __init__(
method run (line 68) | def run(self, instance_ids, progress_callback=None, include_cutoff_tim...
method _calculate_features_for_dataframe (line 163) | def _calculate_features_for_dataframe(
method _calculate_features (line 360) | def _calculate_features(self, df, df_trie, features, progress_callback):
method _add_ancestor_relationship_columns (line 373) | def _add_ancestor_relationship_columns(
method generate_default_df (line 433) | def generate_default_df(self, instance_ids, extra_columns=None):
method _feature_type_handler (line 456) | def _feature_type_handler(self, f):
method _calculate_identity_features (line 470) | def _calculate_identity_features(self, features, df, _df_trie, progres...
method _calculate_transform_features (line 480) | def _calculate_transform_features(
method _calculate_groupby_features (line 526) | def _calculate_groupby_features(self, features, frame, _df_trie, progr...
method _calculate_direct_features (line 594) | def _calculate_direct_features(
method _calculate_agg_features (line 652) | def _calculate_agg_features(self, features, frame, df_trie, progress_c...
method _necessary_columns (line 822) | def _necessary_columns(self, dataframe_name, feature_names):
function _can_agg (line 839) | def _can_agg(feature):
function agg_wrapper (line 855) | def agg_wrapper(feats, time_last):
function update_feature_columns (line 880) | def update_feature_columns(feature_data, data):
function strip_values_if_series (line 897) | def strip_values_if_series(values):
FILE: featuretools/computational_backends/utils.py
function bin_cutoff_times (line 22) | def bin_cutoff_times(cutoff_time, bin_size):
function save_csv_decorator (line 37) | def save_csv_decorator(save_progress=None):
function datetime_round (line 61) | def datetime_round(dt, freq):
function gather_approximate_features (line 85) | def gather_approximate_features(feature_set):
function gen_empty_approx_features_df (line 118) | def gen_empty_approx_features_df(approx_features):
function n_jobs_to_workers (line 124) | def n_jobs_to_workers(n_jobs):
function create_client_and_cluster (line 141) | def create_client_and_cluster(n_jobs, dask_kwargs, entityset_size):
function get_client_cluster (line 211) | def get_client_cluster():
function _validate_cutoff_time (line 225) | def _validate_cutoff_time(
function _check_cutoff_time_type (line 300) | def _check_cutoff_time_type(cutoff_time, es_time_type):
function replace_inf_values (line 326) | def replace_inf_values(feature_matrix, replacement_value=np.nan, columns...
function get_ww_types_from_features (line 349) | def get_ww_types_from_features(
FILE: featuretools/config_init.py
function initialize_logging (line 7) | def initialize_logging():
class Config (line 50) | class Config:
method __init__ (line 51) | def __init__(self):
method set_to_default (line 55) | def set_to_default(self):
method get (line 62) | def get(self, key):
method get_all (line 65) | def get_all(self):
method set (line 68) | def set(self, values):
FILE: featuretools/demo/flight.py
function load_flight (line 11) | def load_flight(
function make_es (line 99) | def make_es(data):
function _clean_data (line 163) | def _clean_data(data):
function _fill_labels (line 262) | def _fill_labels(clean_data):
function _reconstruct_times (line 283) | def _reconstruct_times(clean_data):
function filter_data (line 307) | def filter_data(clean_data, month_filter=None, categorical_filter=None):
function convert (line 323) | def convert(name):
function get_flight_filename (line 330) | def get_flight_filename(demo=True):
FILE: featuretools/demo/mock_customer.py
function load_mock_customer (line 9) | def load_mock_customer(
FILE: featuretools/demo/retail.py
function load_retail (line 7) | def load_retail(id="demo_retail_data", nrows=None, return_single_table=F...
FILE: featuretools/demo/weather.py
function load_weather (line 6) | def load_weather(nrows=None, return_single_table=False):
function make_es (line 29) | def make_es(data):
FILE: featuretools/entityset/deserialize.py
function description_to_entityset (line 17) | def description_to_entityset(description, **kwargs):
function empty_dataframe (line 56) | def empty_dataframe(description):
function read_data_description (line 122) | def read_data_description(path):
function read_entityset (line 141) | def read_entityset(path, profile_name=None, **kwargs):
FILE: featuretools/entityset/entityset.py
class EntitySet (line 28) | class EntitySet(object):
method __init__ (line 43) | def __init__(self, id=None, dataframes=None, relationships=None):
method __sizeof__ (line 116) | def __sizeof__(self):
method __dask_tokenize__ (line 119) | def __dask_tokenize__(self):
method __eq__ (line 122) | def __eq__(self, other, deep=False):
method __ne__ (line 141) | def __ne__(self, other, deep=False):
method __getitem__ (line 144) | def __getitem__(self, dataframe_name):
method __deepcopy__ (line 159) | def __deepcopy__(self, memo):
method dataframes (line 179) | def dataframes(self):
method metadata (line 183) | def metadata(self):
method reset_data_description (line 191) | def reset_data_description(self):
method to_pickle (line 194) | def to_pickle(self, path, compression=None, profile_name=None):
method to_parquet (line 213) | def to_parquet(self, path, engine="auto", compression=None, profile_na...
method to_csv (line 234) | def to_csv(
method to_dictionary (line 268) | def to_dictionary(self):
method __repr__ (line 275) | def __repr__(self):
method add_relationships (line 302) | def add_relationships(self, relationships):
method add_relationship (line 318) | def add_relationship(
method set_secondary_time_index (line 407) | def set_secondary_time_index(self, dataframe_name, secondary_time_index):
method _set_secondary_time_index (line 420) | def _set_secondary_time_index(self, dataframe, secondary_time_index):
method find_forward_paths (line 433) | def find_forward_paths(self, start_dataframe_name, goal_dataframe_name):
method find_backward_paths (line 451) | def find_backward_paths(self, start_dataframe_name, goal_dataframe_name):
method _forward_dataframe_paths (line 467) | def _forward_dataframe_paths(self, start_dataframe_name, seen_datafram...
method get_forward_dataframes (line 496) | def get_forward_dataframes(self, dataframe_name, deep=False):
method get_backward_dataframes (line 519) | def get_backward_dataframes(self, dataframe_name, deep=False):
method get_forward_relationships (line 542) | def get_forward_relationships(self, dataframe_name):
method get_backward_relationships (line 555) | def get_backward_relationships(self, dataframe_name):
method has_unique_forward_path (line 569) | def has_unique_forward_path(self, start_dataframe_name, end_dataframe_...
method add_dataframe (line 586) | def add_dataframe(
method __setitem__ (line 747) | def __setitem__(self, key, value):
method normalize_dataframe (line 750) | def normalize_dataframe(
method concat (line 1007) | def concat(self, other, inplace=False):
method add_last_time_indexes (line 1063) | def add_last_time_indexes(self, updated_dataframes=None):
method __getstate__ (line 1248) | def __getstate__(self):
method __setstate__ (line 1256) | def __setstate__(self, state):
method add_interesting_values (line 1267) | def add_interesting_values(
method plot (line 1339) | def plot(self, to_file=None):
method _handle_time (line 1407) | def _handle_time(
method query_by_values (line 1459) | def query_by_values(
method replace_dataframe (line 1536) | def replace_dataframe(
method _check_time_indexes (line 1612) | def _check_time_indexes(self):
method _check_secondary_time_index (line 1617) | def _check_secondary_time_index(self, dataframe, secondary_time_index=...
method _check_uniform_time_index (line 1633) | def _check_uniform_time_index(self, dataframe, column_name=None):
method _get_time_type (line 1645) | def _get_time_type(self, dataframe, column_name=None):
method _add_references_to_metadata (line 1661) | def _add_references_to_metadata(self, dataframe):
method _normalize_values (line 1669) | def _normalize_values(self, dataframe):
function _vals_to_series (line 1682) | def _vals_to_series(instance_vals, column_id):
function _get_or_create_index (line 1709) | def _get_or_create_index(index, make_index, df):
function _create_index (line 1745) | def _create_index(df, index):
FILE: featuretools/entityset/relationship.py
class Relationship (line 1) | class Relationship(object):
method __init__ (line 8) | def __init__(
method from_dictionary (line 42) | def from_dictionary(cls, arguments, es):
method __repr__ (line 49) | def __repr__(self):
method __eq__ (line 59) | def __eq__(self, other):
method __hash__ (line 70) | def __hash__(self):
method parent_dataframe (line 81) | def parent_dataframe(self):
method child_dataframe (line 86) | def child_dataframe(self):
method parent_column (line 91) | def parent_column(self):
method child_column (line 96) | def child_column(self):
method parent_name (line 101) | def parent_name(self):
method child_name (line 109) | def child_name(self):
method to_dictionary (line 116) | def to_dictionary(self):
method _is_unique (line 124) | def _is_unique(self):
class RelationshipPath (line 141) | class RelationshipPath(object):
method __init__ (line 142) | def __init__(self, relationships_with_direction):
method name (line 146) | def name(self):
method dataframes (line 154) | def dataframes(self):
method __add__ (line 170) | def __add__(self, other):
method __getitem__ (line 175) | def __getitem__(self, index):
method __iter__ (line 178) | def __iter__(self):
method __len__ (line 182) | def __len__(self):
method __eq__ (line 185) | def __eq__(self, other):
method __ne__ (line 192) | def __ne__(self, other):
method __repr__ (line 195) | def __repr__(self):
function _direction_name (line 203) | def _direction_name(is_forward, relationship):
FILE: featuretools/entityset/serialize.py
function entityset_to_description (line 16) | def entityset_to_description(entityset, format=None):
function write_data_description (line 44) | def write_data_description(entityset, path, profile_name=None, **kwargs):
function dump_data_description (line 75) | def dump_data_description(entityset, path, **kwargs):
function create_archive (line 87) | def create_archive(tmpdir):
FILE: featuretools/entityset/timedelta.py
class Timedelta (line 5) | class Timedelta(object):
method __init__ (line 48) | def __init__(self, value, unit=None, delta_obj=None):
method from_dictionary (line 67) | def from_dictionary(cls, dictionary):
method make_singular (line 79) | def make_singular(cls, s):
method _check_unit_plural (line 85) | def _check_unit_plural(cls, s):
method get_value (line 92) | def get_value(self, unit=None):
method get_units (line 100) | def get_units(self):
method get_unit_type (line 103) | def get_unit_type(self):
method check_value (line 113) | def check_value(self, value, unit):
method fix_units (line 124) | def fix_units(self):
method lower_readable_times (line 133) | def lower_readable_times(self):
method get_name (line 140) | def get_name(self):
method get_arguments (line 154) | def get_arguments(self):
method is_absolute (line 165) | def is_absolute(self):
method has_no_observations (line 171) | def has_no_observations(self):
method has_multiple_units (line 177) | def has_multiple_units(self):
method __eq__ (line 183) | def __eq__(self, other):
method __neg__ (line 189) | def __neg__(self):
method __radd__ (line 199) | def __radd__(self, time):
method __rsub__ (line 206) | def __rsub__(self, time):
FILE: featuretools/exceptions.py
class UnknownFeature (line 1) | class UnknownFeature(Exception):
method __init__ (line 2) | def __init__(self, *args, **kwargs):
class UnusedPrimitiveWarning (line 6) | class UnusedPrimitiveWarning(UserWarning):
FILE: featuretools/feature_base/cache.py
class CacheType (line 16) | class CacheType(Enum):
class FeatureCache (line 24) | class FeatureCache:
method get (line 30) | def get(
method add (line 48) | def add(self, cache_type: CacheType, hashkey: int, payload: Any):
method clear_all (line 59) | def clear_all(self):
FILE: featuretools/feature_base/feature_base.py
class FeatureBase (line 18) | class FeatureBase(object):
method __init__ (line 19) | def __init__(
method __getitem__ (line 63) | def __getitem__(self, key):
method from_dictionary (line 73) | def from_dictionary(cls, arguments, entityset, dependencies, primitive):
method rename (line 76) | def rename(self, name):
method copy (line 84) | def copy(self):
method get_name (line 87) | def get_name(self):
method get_feature_names (line 92) | def get_feature_names(self):
method set_feature_names (line 105) | def set_feature_names(self, names):
method get_function (line 133) | def get_function(self, **kwargs):
method get_dependencies (line 136) | def get_dependencies(self, deep=False, ignored=None, copy=True):
method get_depth (line 165) | def get_depth(self, stop_at=None):
method _check_input_types (line 177) | def _check_input_types(self):
method dataframe (line 195) | def dataframe(self):
method number_output_features (line 200) | def number_output_features(self):
method __repr__ (line 203) | def __repr__(self):
method hash (line 206) | def hash(self):
method __hash__ (line 209) | def __hash__(self):
method column_schema (line 213) | def column_schema(self):
method default_value (line 256) | def default_value(self):
method get_arguments (line 259) | def get_arguments(self):
method to_dictionary (line 262) | def to_dictionary(self):
method _handle_binary_comparison (line 269) | def _handle_binary_comparison(self, other, Primitive, PrimitiveScalar):
method __eq__ (line 275) | def __eq__(self, other):
method __ne__ (line 283) | def __ne__(self, other):
method __gt__ (line 291) | def __gt__(self, other):
method __ge__ (line 299) | def __ge__(self, other):
method __lt__ (line 307) | def __lt__(self, other):
method __le__ (line 315) | def __le__(self, other):
method __add__ (line 323) | def __add__(self, other):
method __radd__ (line 331) | def __radd__(self, other):
method __sub__ (line 334) | def __sub__(self, other):
method __rsub__ (line 342) | def __rsub__(self, other):
method __div__ (line 345) | def __div__(self, other):
method __truediv__ (line 353) | def __truediv__(self, other):
method __rtruediv__ (line 356) | def __rtruediv__(self, other):
method __rdiv__ (line 359) | def __rdiv__(self, other):
method __mul__ (line 362) | def __mul__(self, other):
method __rmul__ (line 394) | def __rmul__(self, other):
method __mod__ (line 397) | def __mod__(self, other):
method __rmod__ (line 405) | def __rmod__(self, other):
method __and__ (line 408) | def __and__(self, other):
method __rand__ (line 411) | def __rand__(self, other):
method __or__ (line 414) | def __or__(self, other):
method __ror__ (line 417) | def __ror__(self, other):
method __not__ (line 420) | def __not__(self, other):
method __abs__ (line 423) | def __abs__(self):
method __neg__ (line 426) | def __neg__(self):
method AND (line 429) | def AND(self, other_feature):
method OR (line 433) | def OR(self, other_feature):
method NOT (line 437) | def NOT(self):
method isin (line 441) | def isin(self, list_of_output):
method is_null (line 447) | def is_null(self):
method __invert__ (line 451) | def __invert__(self):
method unique_name (line 454) | def unique_name(self):
method relationship_path_name (line 457) | def relationship_path_name(self):
class IdentityFeature (line 461) | class IdentityFeature(FeatureBase):
method __init__ (line 464) | def __init__(self, column, name=None):
method from_dictionary (line 479) | def from_dictionary(cls, arguments, entityset, dependencies, primitive):
method copy (line 485) | def copy(self):
method generate_name (line 489) | def generate_name(self):
method get_depth (line 492) | def get_depth(self, stop_at=None):
method get_arguments (line 495) | def get_arguments(self):
method column_schema (line 503) | def column_schema(self):
class DirectFeature (line 507) | class DirectFeature(FeatureBase):
method __init__ (line 514) | def __init__(
method _handle_relationship (line 537) | def _handle_relationship(self, entityset, child_dataframe_name, relati...
method from_dictionary (line 576) | def from_dictionary(cls, arguments, entityset, dependencies, primitive):
method number_output_features (line 591) | def number_output_features(self):
method default_value (line 595) | def default_value(self):
method copy (line 598) | def copy(self):
method column_schema (line 608) | def column_schema(self):
method generate_name (line 611) | def generate_name(self):
method generate_names (line 614) | def generate_names(self):
method get_arguments (line 620) | def get_arguments(self):
method _name_from_base (line 628) | def _name_from_base(self, base_name):
class AggregationFeature (line 632) | class AggregationFeature(FeatureBase):
method __init__ (line 641) | def __init__(
method _handle_relationship_path (line 696) | def _handle_relationship_path(
method from_dictionary (line 751) | def from_dictionary(cls, arguments, entityset, dependencies, primitive):
method copy (line 780) | def copy(self):
method _where_str (line 790) | def _where_str(self):
method _use_prev_str (line 797) | def _use_prev_str(self):
method generate_name (line 804) | def generate_name(self):
method generate_names (line 813) | def generate_names(self):
method get_arguments (line 822) | def get_arguments(self):
method relationship_path_name (line 835) | def relationship_path_name(self):
class TransformFeature (line 842) | class TransformFeature(FeatureBase):
method __init__ (line 843) | def __init__(self, base_features, primitive, name=None):
method from_dictionary (line 859) | def from_dictionary(cls, arguments, entityset, dependencies, primitive):
method copy (line 869) | def copy(self):
method generate_name (line 872) | def generate_name(self):
method generate_names (line 877) | def generate_names(self):
method get_arguments (line 882) | def get_arguments(self):
class GroupByTransformFeature (line 893) | class GroupByTransformFeature(TransformFeature):
method __init__ (line 894) | def __init__(self, base_features, primitive, groupby, name=None):
method from_dictionary (line 912) | def from_dictionary(cls, arguments, entityset, dependencies, primitive):
method copy (line 924) | def copy(self):
method generate_name (line 933) | def generate_name(self):
method generate_names (line 940) | def generate_names(self):
method get_arguments (line 946) | def get_arguments(self):
class Feature (line 964) | class Feature(object):
method __new__ (line 969) | def __new__(
class FeatureOutputSlice (line 1012) | class FeatureOutputSlice(FeatureBase):
method __init__ (line 1017) | def __init__(self, base_feature, n, name=None):
method __getitem__ (line 1040) | def __getitem__(self, key):
method generate_name (line 1043) | def generate_name(self):
method number_output_features (line 1047) | def number_output_features(self):
method get_arguments (line 1050) | def get_arguments(self):
method from_dictionary (line 1058) | def from_dictionary(cls, arguments, entityset, dependencies, primitive):
method copy (line 1065) | def copy(self):
function _validate_base_features (line 1069) | def _validate_base_features(feature):
FILE: featuretools/feature_base/feature_descriptions.py
function describe_feature (line 6) | def describe_feature(
function generate_description (line 43) | def generate_description(feature, feature_descriptions, primitive_templa...
function get_direct_description (line 149) | def get_direct_description(feature):
function get_aggregation_groupby (line 170) | def get_aggregation_groupby(feature, feature_descriptions=None):
function parse_json_metadata (line 185) | def parse_json_metadata(file):
FILE: featuretools/feature_base/feature_visualizer.py
function graph_feature (line 35) | def graph_feature(feature, to_file=None, description=False, **kwargs):
function get_feature_data (line 139) | def get_feature_data(feat, dataframes, groupbys, edges, primitives, laye...
function add_dataframe (line 243) | def add_dataframe(dataframe, dataframe_dict):
function get_dataframe_table (line 252) | def get_dataframe_table(dataframe_name, dataframe_dict):
FILE: featuretools/feature_base/features_deserializer.py
function load_features (line 22) | def load_features(features, profile_name=None):
class FeaturesDeserializer (line 70) | class FeaturesDeserializer(object):
method __init__ (line 82) | def __init__(self, features_dict):
method load (line 95) | def load(cls, features, profile_name):
method to_list (line 114) | def to_list(self):
method _deserialize_feature (line 118) | def _deserialize_feature(self, feature_name):
method _check_schema_version (line 146) | def _check_schema_version(self):
FILE: featuretools/feature_base/features_serializer.py
function save_features (line 10) | def save_features(features, location=None, profile_name=None):
class FeaturesSerializer (line 65) | class FeaturesSerializer(object):
method __init__ (line 66) | def __init__(self, feature_list):
method to_dict (line 70) | def to_dict(self):
method save (line 85) | def save(self, location, profile_name):
method _feature_definitions (line 106) | def _feature_definitions(self):
method _serialize_feature (line 140) | def _serialize_feature(self, feature):
FILE: featuretools/feature_base/utils.py
function is_valid_input (line 1) | def is_valid_input(candidate, template):
FILE: featuretools/feature_discovery/FeatureCollection.py
class FeatureCollection (line 18) | class FeatureCollection:
method __init__ (line 19) | def __init__(self, features: List[LiteFeature]):
method sort_features (line 25) | def sort_features(self):
method __repr__ (line 30) | def __repr__(self):
method all_features (line 34) | def all_features(self):
method hash_key (line 38) | def hash_key(self) -> str:
method _set_hash (line 46) | def _set_hash(self):
method __hash__ (line 55) | def __hash__(self):
method __eq__ (line 58) | def __eq__(self, other: FeatureCollection) -> bool:
method reindex (line 61) | def reindex(self) -> FeatureCollection:
method get_by_logical_type (line 100) | def get_by_logical_type(self, logical_type: Type[LogicalType]) -> Set[...
method get_by_tag (line 103) | def get_by_tag(self, tag: str) -> Set[LiteFeature]:
method get_by_origin_feature (line 106) | def get_by_origin_feature(self, origin_feature: LiteFeature) -> Set[Li...
method get_by_origin_feature_name (line 109) | def get_by_origin_feature_name(self, name: str) -> Union[LiteFeature, ...
method get_dependencies_by_origin_name (line 113) | def get_dependencies_by_origin_name(self, name) -> Set[LiteFeature]:
method get_by_key (line 119) | def get_by_key(self, key: str) -> List[LiteFeature]:
method flatten_features (line 122) | def flatten_features(self) -> Dict[str, LiteFeature]:
method flatten_primitives (line 133) | def flatten_primitives(self) -> Dict[str, Dict[str, Any]]:
method to_dict (line 146) | def to_dict(self):
method feature_to_keys (line 157) | def feature_to_keys(feature: LiteFeature) -> List[str]:
method from_dict (line 198) | def from_dict(input_dict):
FILE: featuretools/feature_discovery/LiteFeature.py
class LiteFeature (line 19) | class LiteFeature:
method __init__ (line 36) | def __init__(
method name (line 99) | def name(self):
method name (line 107) | def name(self, _):
method set_alias (line 110) | def set_alias(self, value: Union[str, None]):
method non_indexed_name (line 114) | def non_indexed_name(self):
method logical_type (line 120) | def logical_type(self):
method logical_type (line 124) | def logical_type(self, _):
method tags (line 128) | def tags(self):
method tags (line 132) | def tags(self, _):
method primitive (line 136) | def primitive(self):
method primitive (line 140) | def primitive(self, _):
method base_features (line 144) | def base_features(self):
method base_features (line 148) | def base_features(self, _):
method df_id (line 152) | def df_id(self):
method df_id (line 156) | def df_id(self, _):
method id (line 160) | def id(self):
method id (line 164) | def id(self, _):
method n_output_features (line 168) | def n_output_features(self):
method n_output_features (line 172) | def n_output_features(self, _):
method depth (line 176) | def depth(self):
method depth (line 180) | def depth(self, _):
method related_features (line 184) | def related_features(self):
method related_features (line 188) | def related_features(self, value: Set[LiteFeature]):
method idx (line 192) | def idx(self):
method idx (line 196) | def idx(self, _):
method hash (line 200) | def hash(
method __eq__ (line 233) | def __eq__(self, other: LiteFeature):
method __lt__ (line 236) | def __lt__(self, other: LiteFeature):
method __ne__ (line 239) | def __ne__(self, other):
method __hash__ (line 242) | def __hash__(self):
method _generate_hash (line 245) | def _generate_hash(self) -> str:
method get_primitive_name (line 254) | def get_primitive_name(self) -> Union[str, None]:
method get_dependencies (line 257) | def get_dependencies(self, deep=False) -> List[LiteFeature]:
method get_origin_features (line 270) | def get_origin_features(self) -> List[LiteFeature]:
method column_schema (line 275) | def column_schema(self) -> ColumnSchema:
method dependent_primitives (line 278) | def dependent_primitives(self) -> Set[Type[PrimitiveBase]]:
method to_dict (line 287) | def to_dict(self) -> Dict[str, Any]:
method is_multioutput (line 300) | def is_multioutput(self) -> bool:
method copy (line 303) | def copy(self) -> LiteFeature:
method __repr__ (line 319) | def __repr__(self) -> str:
FILE: featuretools/feature_discovery/convertors.py
function convert_featurebase_list_to_feature_list (line 20) | def convert_featurebase_list_to_feature_list(
function _feature_to_transform_feature (line 95) | def _feature_to_transform_feature(
function _convert_feature_to_featurebase (line 133) | def _convert_feature_to_featurebase(
function convert_feature_list_to_featurebase_list (line 185) | def convert_feature_list_to_featurebase_list(
FILE: featuretools/feature_discovery/feature_discovery.py
function _index_column_set (line 16) | def _index_column_set(column_set: List[ColumnSchema]) -> List[Tuple[str,...
function _get_features (line 45) | def _get_features(
function _primitive_to_columnsets (line 95) | def _primitive_to_columnsets(primitive: PrimitiveBase) -> List[List[Colu...
function _get_matching_features (line 119) | def _get_matching_features(
function _features_from_primitive (line 176) | def _features_from_primitive(
function schema_to_features (line 245) | def schema_to_features(schema: TableSchema) -> List[LiteFeature]:
function _check_inputs (line 298) | def _check_inputs(
function generate_features_from_primitives (line 330) | def generate_features_from_primitives(
FILE: featuretools/feature_discovery/utils.py
function column_schema_to_keys (line 20) | def column_schema_to_keys(column_schema: ColumnSchema) -> str:
function hash_primitive (line 51) | def hash_primitive(primitive: PrimitiveBase) -> Tuple[str, Dict[str, Any]]:
function get_primitive_return_type (line 62) | def get_primitive_return_type(primitive: PrimitiveBase) -> ColumnSchema:
function flatten_list (line 80) | def flatten_list(nested_list):
FILE: featuretools/primitives/__init__.py
function _load_primitives (line 19) | def _load_primitives():
FILE: featuretools/primitives/base/aggregation_primitive_base.py
class AggregationPrimitive (line 4) | class AggregationPrimitive(PrimitiveBase):
method generate_name (line 5) | def generate_name(
method generate_names (line 23) | def generate_names(
FILE: featuretools/primitives/base/primitive_base.py
class PrimitiveBase (line 11) | class PrimitiveBase(object):
method __init__ (line 49) | def __init__(self):
method __call__ (line 52) | def __call__(self, *args, **kwargs):
method __lt__ (line 60) | def __lt__(self, other):
method generate_name (line 65) | def generate_name(self):
method generate_names (line 68) | def generate_names(self):
method get_function (line 71) | def get_function(self):
method get_filepath (line 74) | def get_filepath(self, filename):
method get_args_string (line 77) | def get_args_string(self):
method get_arguments (line 91) | def get_arguments(self):
method get_description (line 111) | def get_description(
method flatten_nested_input_types (line 155) | def flatten_nested_input_types(input_types):
FILE: featuretools/primitives/base/transform_primitive_base.py
class TransformPrimitive (line 4) | class TransformPrimitive(PrimitiveBase):
method generate_name (line 12) | def generate_name(self, base_feature_names):
method generate_names (line 19) | def generate_names(self, base_feature_names):
FILE: featuretools/primitives/options_utils.py
function _get_primitive_options (line 11) | def _get_primitive_options():
function dict_to_list_column_check (line 25) | def dict_to_list_column_check(option, es):
function list_dataframe_check (line 45) | def list_dataframe_check(option, es):
function generate_all_primitive_options (line 56) | def generate_all_primitive_options(
function _init_primitive_options (line 131) | def _init_primitive_options(primitive_options, es):
function _init_option_dict (line 175) | def _init_option_dict(key, option_dict, es):
function column_filter (line 204) | def column_filter(f, options, groupby=False):
function ignore_dataframe_for_primitive (line 240) | def ignore_dataframe_for_primitive(options, dataframe, groupby=False):
function filter_groupby_matches_by_options (line 273) | def filter_groupby_matches_by_options(groupby_matches, options):
function filter_matches_by_options (line 281) | def filter_matches_by_options(matches, options, groupby=False, commutati...
FILE: featuretools/primitives/standard/aggregation/all_primitive.py
class All (line 8) | class All(AggregationPrimitive):
method get_function (line 30) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/any_primitive.py
class Any (line 8) | class Any(AggregationPrimitive):
method get_function (line 30) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/average_count_per_unique.py
class AverageCountPerUnique (line 7) | class AverageCountPerUnique(AggregationPrimitive):
method __init__ (line 42) | def __init__(self, skipna=True):
method get_function (line 45) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/avg_time_between.py
class AvgTimeBetween (line 12) | class AvgTimeBetween(AggregationPrimitive):
method __init__ (line 43) | def __init__(self, unit="seconds"):
method get_function (line 46) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/count.py
class Count (line 8) | class Count(AggregationPrimitive):
method get_function (line 24) | def get_function(self):
method generate_name (line 27) | def generate_name(
FILE: featuretools/primitives/standard/aggregation/count_above_mean.py
class CountAboveMean (line 8) | class CountAboveMean(AggregationPrimitive):
method __init__ (line 32) | def __init__(self, skipna=True):
method get_function (line 35) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/count_below_mean.py
class CountBelowMean (line 8) | class CountBelowMean(AggregationPrimitive):
method __init__ (line 32) | def __init__(self, skipna=True):
method get_function (line 35) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/count_greater_than.py
class CountGreaterThan (line 7) | class CountGreaterThan(AggregationPrimitive):
method __init__ (line 26) | def __init__(self, threshold=10):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/count_inside_nth_std.py
class CountInsideNthSTD (line 8) | class CountInsideNthSTD(AggregationPrimitive):
method __init__ (line 27) | def __init__(self, n=1):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/count_inside_range.py
class CountInsideRange (line 8) | class CountInsideRange(AggregationPrimitive):
method __init__ (line 36) | def __init__(self, lower=0, upper=1, skipna=True):
method get_function (line 41) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/count_less_than.py
class CountLessThan (line 7) | class CountLessThan(AggregationPrimitive):
method __init__ (line 26) | def __init__(self, threshold=10):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/count_outside_nth_std.py
class CountOutsideNthSTD (line 8) | class CountOutsideNthSTD(AggregationPrimitive):
method __init__ (line 27) | def __init__(self, n=1):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/count_outside_range.py
class CountOutsideRange (line 8) | class CountOutsideRange(AggregationPrimitive):
method __init__ (line 35) | def __init__(self, lower=0, upper=1, skipna=True):
method get_function (line 40) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/date_first_event.py
class DateFirstEvent (line 8) | class DateFirstEvent(AggregationPrimitive):
method get_function (line 27) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/entropy.py
class Entropy (line 7) | class Entropy(AggregationPrimitive):
method __init__ (line 34) | def __init__(self, dropna=False, base=None):
method get_function (line 38) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/first.py
class First (line 6) | class First(AggregationPrimitive):
method get_function (line 21) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/first_last_time_delta.py
class FirstLastTimeDelta (line 8) | class FirstLastTimeDelta(AggregationPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/has_no_duplicates.py
class HasNoDuplicates (line 7) | class HasNoDuplicates(AggregationPrimitive):
method __init__ (line 44) | def __init__(self, skipna=True):
method get_function (line 47) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/is_monotonically_decreasing.py
class IsMonotonicallyDecreasing (line 7) | class IsMonotonicallyDecreasing(AggregationPrimitive):
method get_function (line 27) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/is_monotonically_increasing.py
class IsMonotonicallyIncreasing (line 7) | class IsMonotonicallyIncreasing(AggregationPrimitive):
method get_function (line 27) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/is_unique.py
class IsUnique (line 7) | class IsUnique(AggregationPrimitive):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/kurtosis.py
class Kurtosis (line 8) | class Kurtosis(AggregationPrimitive):
method __init__ (line 58) | def __init__(self, fisher=True, bias=True, nan_policy="propagate"):
method get_function (line 65) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/last.py
class Last (line 6) | class Last(AggregationPrimitive):
method get_function (line 21) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/max_consecutive_false.py
class MaxConsecutiveFalse (line 7) | class MaxConsecutiveFalse(AggregationPrimitive):
method get_function (line 22) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/max_consecutive_negatives.py
class MaxConsecutiveNegatives (line 7) | class MaxConsecutiveNegatives(AggregationPrimitive):
method __init__ (line 34) | def __init__(self, skipna=True):
method get_function (line 37) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/max_consecutive_positives.py
class MaxConsecutivePositives (line 7) | class MaxConsecutivePositives(AggregationPrimitive):
method __init__ (line 34) | def __init__(self, skipna=True):
method get_function (line 37) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/max_consecutive_true.py
class MaxConsecutiveTrue (line 7) | class MaxConsecutiveTrue(AggregationPrimitive):
method get_function (line 22) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/max_consecutive_zeros.py
class MaxConsecutiveZeros (line 7) | class MaxConsecutiveZeros(AggregationPrimitive):
method __init__ (line 34) | def __init__(self, skipna=True):
method get_function (line 37) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/max_count.py
class MaxCount (line 7) | class MaxCount(AggregationPrimitive):
method __init__ (line 32) | def __init__(self, skipna=True):
method get_function (line 35) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/max_min_delta.py
class MaxMinDelta (line 6) | class MaxMinDelta(AggregationPrimitive):
method __init__ (line 31) | def __init__(self, skipna=True):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/max_primitive.py
class Max (line 7) | class Max(AggregationPrimitive):
method get_function (line 22) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/mean.py
class Mean (line 7) | class Mean(AggregationPrimitive):
method __init__ (line 31) | def __init__(self, skipna=True):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/median.py
class Median (line 7) | class Median(AggregationPrimitive):
method get_function (line 26) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/median_count.py
class MedianCount (line 8) | class MedianCount(AggregationPrimitive):
method __init__ (line 35) | def __init__(self, skipna=True):
method get_function (line 38) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/min_count.py
class MinCount (line 8) | class MinCount(AggregationPrimitive):
method __init__ (line 33) | def __init__(self, skipna=True):
method get_function (line 36) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/min_primitive.py
class Min (line 7) | class Min(AggregationPrimitive):
method get_function (line 22) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/mode.py
class Mode (line 7) | class Mode(AggregationPrimitive):
method get_function (line 26) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/n_most_common.py
class NMostCommon (line 7) | class NMostCommon(AggregationPrimitive):
method __init__ (line 31) | def __init__(self, n=3):
method get_function (line 40) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/n_most_common_frequency.py
class NMostCommonFrequency (line 9) | class NMostCommonFrequency(AggregationPrimitive):
method __init__ (line 50) | def __init__(self, n=3, skipna=True):
method get_function (line 55) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/n_unique_days.py
class NUniqueDays (line 7) | class NUniqueDays(AggregationPrimitive):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/n_unique_days_of_calendar_year.py
class NUniqueDaysOfCalendarYear (line 7) | class NUniqueDaysOfCalendarYear(AggregationPrimitive):
method get_function (line 32) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/n_unique_days_of_month.py
class NUniqueDaysOfMonth (line 7) | class NUniqueDaysOfMonth(AggregationPrimitive):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/n_unique_months.py
class NUniqueMonths (line 7) | class NUniqueMonths(AggregationPrimitive):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/n_unique_weeks.py
class NUniqueWeeks (line 7) | class NUniqueWeeks(AggregationPrimitive):
method get_function (line 35) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/num_consecutive_greater_mean.py
class NumConsecutiveGreaterMean (line 8) | class NumConsecutiveGreaterMean(AggregationPrimitive):
method __init__ (line 39) | def __init__(self, skipna=True):
method get_function (line 42) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/num_consecutive_less_mean.py
class NumConsecutiveLessMean (line 8) | class NumConsecutiveLessMean(AggregationPrimitive):
method __init__ (line 39) | def __init__(self, skipna=True):
method get_function (line 42) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/num_false_since_last_true.py
class NumFalseSinceLastTrue (line 8) | class NumFalseSinceLastTrue(AggregationPrimitive):
method get_function (line 31) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/num_peaks.py
class NumPeaks (line 9) | class NumPeaks(AggregationPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/num_true.py
class NumTrue (line 8) | class NumTrue(AggregationPrimitive):
method get_function (line 32) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/num_true_since_last_false.py
class NumTrueSinceLastFalse (line 8) | class NumTrueSinceLastFalse(AggregationPrimitive):
method get_function (line 30) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/num_unique.py
class NumUnique (line 8) | class NumUnique(AggregationPrimitive):
method __init__ (line 34) | def __init__(self, use_string_for_pd_calc=True):
method get_function (line 37) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/num_zero_crossings.py
class NumZeroCrossings (line 8) | class NumZeroCrossings(AggregationPrimitive):
method get_function (line 27) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/percent_true.py
class PercentTrue (line 8) | class PercentTrue(AggregationPrimitive):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/percent_unique.py
class PercentUnique (line 7) | class PercentUnique(AggregationPrimitive):
method __init__ (line 39) | def __init__(self, skipna=True):
method get_function (line 42) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/skew.py
class Skew (line 7) | class Skew(AggregationPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/std.py
class Std (line 7) | class Std(AggregationPrimitive):
method get_function (line 22) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/sum_primitive.py
class Sum (line 8) | class Sum(AggregationPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/time_since_first.py
class TimeSinceFirst (line 8) | class TimeSinceFirst(AggregationPrimitive):
method __init__ (line 48) | def __init__(self, unit="seconds"):
method get_function (line 51) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/time_since_last.py
class TimeSinceLast (line 8) | class TimeSinceLast(AggregationPrimitive):
method __init__ (line 48) | def __init__(self, unit="seconds"):
method get_function (line 51) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/time_since_last_false.py
class TimeSinceLastFalse (line 9) | class TimeSinceLastFalse(AggregationPrimitive):
method get_function (line 45) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/time_since_last_max.py
class TimeSinceLastMax (line 9) | class TimeSinceLastMax(AggregationPrimitive):
method get_function (line 42) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/time_since_last_min.py
class TimeSinceLastMin (line 9) | class TimeSinceLastMin(AggregationPrimitive):
method get_function (line 42) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/time_since_last_true.py
class TimeSinceLastTrue (line 9) | class TimeSinceLastTrue(AggregationPrimitive):
method get_function (line 45) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/trend.py
class Trend (line 9) | class Trend(AggregationPrimitive):
method get_function (line 37) | def get_function(self):
FILE: featuretools/primitives/standard/aggregation/variance.py
class Variance (line 8) | class Variance(AggregationPrimitive):
method get_function (line 35) | def get_function(self):
FILE: featuretools/primitives/standard/transform/absolute_diff.py
class AbsoluteDiff (line 6) | class AbsoluteDiff(TransformPrimitive):
method __init__ (line 60) | def __init__(self, method="ffill", limit=None):
method get_function (line 66) | def get_function(self):
FILE: featuretools/primitives/standard/transform/binary/add_numeric.py
class AddNumeric (line 7) | class AddNumeric(TransformPrimitive):
method get_function (line 31) | def get_function(self):
method generate_name (line 34) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/add_numeric_scalar.py
class AddNumericScalar (line 6) | class AddNumericScalar(TransformPrimitive):
method __init__ (line 23) | def __init__(self, value=0):
method get_function (line 27) | def get_function(self):
method generate_name (line 33) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/and_primitive.py
class And (line 8) | class And(TransformPrimitive):
method get_function (line 43) | def get_function(self):
method generate_name (line 46) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/divide_by_feature.py
class DivideByFeature (line 6) | class DivideByFeature(TransformPrimitive):
method __init__ (line 24) | def __init__(self, value=1):
method get_function (line 30) | def get_function(self):
method generate_name (line 36) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/divide_numeric.py
class DivideNumeric (line 6) | class DivideNumeric(TransformPrimitive):
method __init__ (line 34) | def __init__(self, commutative=False):
method get_function (line 37) | def get_function(self):
method generate_name (line 43) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/divide_numeric_scalar.py
class DivideNumericScalar (line 6) | class DivideNumericScalar(TransformPrimitive):
method __init__ (line 23) | def __init__(self, value=1):
method get_function (line 29) | def get_function(self):
method generate_name (line 35) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/equal.py
class Equal (line 8) | class Equal(TransformPrimitive):
method get_function (line 29) | def get_function(self):
method generate_name (line 48) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/equal_scalar.py
class EqualScalar (line 7) | class EqualScalar(TransformPrimitive):
method __init__ (line 24) | def __init__(self, value=None):
method get_function (line 28) | def get_function(self):
method generate_name (line 34) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/greater_than.py
class GreaterThan (line 9) | class GreaterThan(TransformPrimitive):
method get_function (line 35) | def get_function(self):
method generate_name (line 50) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/greater_than_equal_to.py
class GreaterThanEqualTo (line 9) | class GreaterThanEqualTo(TransformPrimitive):
method get_function (line 36) | def get_function(self):
method generate_name (line 51) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/greater_than_equal_to_scalar.py
class GreaterThanEqualToScalar (line 7) | class GreaterThanEqualToScalar(TransformPrimitive):
method __init__ (line 25) | def __init__(self, value=0):
method get_function (line 31) | def get_function(self):
method generate_name (line 37) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/greater_than_scalar.py
class GreaterThanScalar (line 7) | class GreaterThanScalar(TransformPrimitive):
method __init__ (line 25) | def __init__(self, value=0):
method get_function (line 29) | def get_function(self):
method generate_name (line 35) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/less_than.py
class LessThan (line 9) | class LessThan(TransformPrimitive):
method get_function (line 36) | def get_function(self):
method generate_name (line 51) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/less_than_equal_to.py
class LessThanEqualTo (line 9) | class LessThanEqualTo(TransformPrimitive):
method get_function (line 36) | def get_function(self):
method generate_name (line 51) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/less_than_equal_to_scalar.py
class LessThanEqualToScalar (line 7) | class LessThanEqualToScalar(TransformPrimitive):
method __init__ (line 25) | def __init__(self, value=0):
method get_function (line 31) | def get_function(self):
method generate_name (line 37) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/less_than_scalar.py
class LessThanScalar (line 7) | class LessThanScalar(TransformPrimitive):
method __init__ (line 25) | def __init__(self, value=0):
method get_function (line 29) | def get_function(self):
method generate_name (line 35) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/modulo_by_feature.py
class ModuloByFeature (line 6) | class ModuloByFeature(TransformPrimitive):
method __init__ (line 24) | def __init__(self, value=1):
method get_function (line 30) | def get_function(self):
method generate_name (line 36) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/modulo_numeric.py
class ModuloNumeric (line 7) | class ModuloNumeric(TransformPrimitive):
method get_function (line 30) | def get_function(self):
method generate_name (line 33) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/modulo_numeric_scalar.py
class ModuloNumericScalar (line 6) | class ModuloNumericScalar(TransformPrimitive):
method __init__ (line 24) | def __init__(self, value=1):
method get_function (line 30) | def get_function(self):
method generate_name (line 36) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/multiply_boolean.py
class MultiplyBoolean (line 8) | class MultiplyBoolean(TransformPrimitive):
method get_function (line 42) | def get_function(self):
method generate_name (line 45) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/multiply_numeric.py
class MultiplyNumeric (line 7) | class MultiplyNumeric(TransformPrimitive):
method get_function (line 31) | def get_function(self):
method generate_name (line 34) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/multiply_numeric_boolean.py
class MultiplyNumericBoolean (line 8) | class MultiplyNumericBoolean(TransformPrimitive):
method get_function (line 50) | def get_function(self):
method generate_name (line 63) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/multiply_numeric_scalar.py
class MultiplyNumericScalar (line 6) | class MultiplyNumericScalar(TransformPrimitive):
method __init__ (line 23) | def __init__(self, value=1):
method get_function (line 27) | def get_function(self):
method generate_name (line 33) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/not_equal.py
class NotEqual (line 8) | class NotEqual(TransformPrimitive):
method get_function (line 28) | def get_function(self):
method generate_name (line 47) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/not_equal_scalar.py
class NotEqualScalar (line 7) | class NotEqualScalar(TransformPrimitive):
method __init__ (line 24) | def __init__(self, value=None):
method get_function (line 28) | def get_function(self):
method generate_name (line 34) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/or_primitive.py
class Or (line 8) | class Or(TransformPrimitive):
method get_function (line 43) | def get_function(self):
method generate_name (line 46) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/scalar_subtract_numeric_feature.py
class ScalarSubtractNumericFeature (line 6) | class ScalarSubtractNumericFeature(TransformPrimitive):
method __init__ (line 24) | def __init__(self, value=0):
method get_function (line 28) | def get_function(self):
method generate_name (line 34) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/subtract_numeric.py
class SubtractNumeric (line 7) | class SubtractNumeric(TransformPrimitive):
method __init__ (line 39) | def __init__(self, commutative=True):
method get_function (line 42) | def get_function(self):
method generate_name (line 45) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/binary/subtract_numeric_scalar.py
class SubtractNumericScalar (line 6) | class SubtractNumericScalar(TransformPrimitive):
method __init__ (line 23) | def __init__(self, value=0):
method get_function (line 27) | def get_function(self):
method generate_name (line 33) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/cumulative/cum_count.py
class CumCount (line 8) | class CumCount(TransformPrimitive):
method get_function (line 32) | def get_function(self):
FILE: featuretools/primitives/standard/transform/cumulative/cum_max.py
class CumMax (line 6) | class CumMax(TransformPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/transform/cumulative/cum_mean.py
class CumMean (line 7) | class CumMean(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/cumulative/cum_min.py
class CumMin (line 6) | class CumMin(TransformPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/transform/cumulative/cum_sum.py
class CumSum (line 6) | class CumSum(TransformPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/transform/cumulative/cumulative_time_since_last_false.py
class CumulativeTimeSinceLastFalse (line 9) | class CumulativeTimeSinceLastFalse(TransformPrimitive):
method get_function (line 39) | def get_function(self):
FILE: featuretools/primitives/standard/transform/cumulative/cumulative_time_since_last_true.py
class CumulativeTimeSinceLastTrue (line 9) | class CumulativeTimeSinceLastTrue(TransformPrimitive):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/age.py
class Age (line 7) | class Age(TransformPrimitive):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/date_to_holiday.py
class DateToHoliday (line 9) | class DateToHoliday(TransformPrimitive):
method __init__ (line 45) | def __init__(self, country="US"):
method get_function (line 49) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/date_to_timezone.py
class DateToTimeZone (line 8) | class DateToTimeZone(TransformPrimitive):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/day.py
class Day (line 7) | class Day(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/day_of_year.py
class DayOfYear (line 7) | class DayOfYear(TransformPrimitive):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/days_in_month.py
class DaysInMonth (line 7) | class DaysInMonth(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/diff_datetime.py
class DiffDatetime (line 7) | class DiffDatetime(Diff):
method __init__ (line 40) | def __init__(self, periods=0):
FILE: featuretools/primitives/standard/transform/datetime/distance_to_holiday.py
class DistanceToHoliday (line 9) | class DistanceToHoliday(TransformPrimitive):
method __init__ (line 55) | def __init__(self, holiday="New Year's Day", country="US"):
method get_function (line 65) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/hour.py
class Hour (line 7) | class Hour(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_federal_holiday.py
class IsFederalHoliday (line 9) | class IsFederalHoliday(TransformPrimitive):
method __init__ (line 34) | def __init__(self, country="US"):
method get_function (line 38) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_first_week_of_month.py
class IsFirstWeekOfMonth (line 9) | class IsFirstWeekOfMonth(TransformPrimitive):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_leap_year.py
class IsLeapYear (line 7) | class IsLeapYear(TransformPrimitive):
method get_function (line 26) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_lunch_time.py
class IsLunchTime (line 7) | class IsLunchTime(TransformPrimitive):
method __init__ (line 34) | def __init__(self, lunch_hour=12):
method get_function (line 37) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_month_end.py
class IsMonthEnd (line 7) | class IsMonthEnd(TransformPrimitive):
method get_function (line 26) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_month_start.py
class IsMonthStart (line 7) | class IsMonthStart(TransformPrimitive):
method get_function (line 26) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_quarter_end.py
class IsQuarterEnd (line 7) | class IsQuarterEnd(TransformPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_quarter_start.py
class IsQuarterStart (line 7) | class IsQuarterStart(TransformPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_weekend.py
class IsWeekend (line 7) | class IsWeekend(TransformPrimitive):
method get_function (line 26) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_working_hours.py
class IsWorkingHours (line 7) | class IsWorkingHours(TransformPrimitive):
method __init__ (line 35) | def __init__(self, start_hour=8, end_hour=18):
method get_function (line 39) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_year_end.py
class IsYearEnd (line 7) | class IsYearEnd(TransformPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/is_year_start.py
class IsYearStart (line 7) | class IsYearStart(TransformPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/minute.py
class Minute (line 7) | class Minute(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/month.py
class Month (line 7) | class Month(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/part_of_day.py
class PartOfDay (line 9) | class PartOfDay(TransformPrimitive):
method construct_replacement_dict (line 41) | def construct_replacement_dict():
method get_function (line 62) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/quarter.py
class Quarter (line 7) | class Quarter(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/season.py
class Season (line 10) | class Season(TransformPrimitive):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/second.py
class Second (line 7) | class Second(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/time_since.py
class TimeSince (line 8) | class TimeSince(TransformPrimitive):
method __init__ (line 46) | def __init__(self, unit="seconds"):
method get_function (line 49) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/time_since_previous.py
class TimeSincePrevious (line 8) | class TimeSincePrevious(TransformPrimitive):
method __init__ (line 38) | def __init__(self, unit="seconds"):
method get_function (line 41) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/utils.py
class HolidayUtil (line 7) | class HolidayUtil:
method __init__ (line 8) | def __init__(self, country="US"):
method to_df (line 24) | def to_df(self):
method convert_to_subdivision (line 32) | def convert_to_subdivision(self, country: str) -> Tuple[str, Optional[...
FILE: featuretools/primitives/standard/transform/datetime/week.py
class Week (line 7) | class Week(TransformPrimitive):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/weekday.py
class Weekday (line 7) | class Weekday(TransformPrimitive):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/transform/datetime/year.py
class Year (line 7) | class Year(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/email/email_address_to_domain.py
class EmailAddressToDomain (line 9) | class EmailAddressToDomain(TransformPrimitive):
method get_function (line 27) | def get_function(self):
FILE: featuretools/primitives/standard/transform/email/is_free_email_domain.py
class IsFreeEmailDomain (line 9) | class IsFreeEmailDomain(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/exponential/exponential_weighted_average.py
class ExponentialWeightedAverage (line 7) | class ExponentialWeightedAverage(TransformPrimitive):
method __init__ (line 48) | def __init__(self, com=None, span=None, halflife=None, alpha=None, ign...
method get_function (line 57) | def get_function(self):
FILE: featuretools/primitives/standard/transform/exponential/exponential_weighted_std.py
class ExponentialWeightedSTD (line 7) | class ExponentialWeightedSTD(TransformPrimitive):
method __init__ (line 50) | def __init__(self, com=None, span=None, halflife=None, alpha=None, ign...
method get_function (line 59) | def get_function(self):
FILE: featuretools/primitives/standard/transform/exponential/exponential_weighted_variance.py
class ExponentialWeightedVariance (line 7) | class ExponentialWeightedVariance(TransformPrimitive):
method __init__ (line 49) | def __init__(self, com=None, span=None, halflife=None, alpha=None, ign...
method get_function (line 58) | def get_function(self):
FILE: featuretools/primitives/standard/transform/file_extension.py
class FileExtension (line 7) | class FileExtension(TransformPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/transform/full_name_to_first_name.py
class FullNameToFirstName (line 8) | class FullNameToFirstName(TransformPrimitive):
method get_function (line 32) | def get_function(self):
FILE: featuretools/primitives/standard/transform/full_name_to_last_name.py
class FullNameToLastName (line 8) | class FullNameToLastName(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/full_name_to_title.py
class FullNameToTitle (line 7) | class FullNameToTitle(TransformPrimitive):
method get_function (line 27) | def get_function(self):
FILE: featuretools/primitives/standard/transform/is_in.py
class IsIn (line 7) | class IsIn(TransformPrimitive):
method __init__ (line 21) | def __init__(self, list_of_outputs=None):
method get_function (line 31) | def get_function(self):
method generate_name (line 37) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/is_null.py
class IsNull (line 7) | class IsNull(TransformPrimitive):
method get_function (line 21) | def get_function(self):
FILE: featuretools/primitives/standard/transform/latlong/cityblock_distance.py
class CityblockDistance (line 12) | class CityblockDistance(TransformPrimitive):
method __init__ (line 51) | def __init__(self, unit="miles"):
method get_function (line 56) | def get_function(self):
FILE: featuretools/primitives/standard/transform/latlong/geomidpoint.py
class GeoMidpoint (line 8) | class GeoMidpoint(TransformPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/transform/latlong/haversine.py
class Haversine (line 11) | class Haversine(TransformPrimitive):
method __init__ (line 42) | def __init__(self, unit="miles"):
method get_function (line 55) | def get_function(self):
method generate_name (line 69) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/latlong/is_in_geobox.py
class IsInGeoBox (line 8) | class IsInGeoBox(TransformPrimitive):
method __init__ (line 33) | def __init__(self, point1=(0, 0), point2=(0, 0)):
method get_function (line 39) | def get_function(self):
FILE: featuretools/primitives/standard/transform/latlong/latitude.py
class Latitude (line 8) | class Latitude(TransformPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/transform/latlong/longitude.py
class Longitude (line 8) | class Longitude(TransformPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/transform/latlong/utils.py
function _haversine_calculate (line 4) | def _haversine_calculate(lat_1s, lon_1s, lat_2s, lon_2s, unit):
FILE: featuretools/primitives/standard/transform/natural_language/count_string.py
class CountString (line 10) | class CountString(TransformPrimitive):
method __init__ (line 64) | def __init__(
method process_text (line 93) | def process_text(self, text):
method get_function (line 100) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/mean_characters_per_word.py
function _mean_characters_per_word (line 18) | def _mean_characters_per_word(value):
class MeanCharactersPerWord (line 30) | class MeanCharactersPerWord(TransformPrimitive):
method get_function (line 52) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/median_word_length.py
class MedianWordLength (line 11) | class MedianWordLength(TransformPrimitive):
method __init__ (line 37) | def __init__(self, delimiters_regex=DELIMITERS):
method get_function (line 40) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/num_characters.py
class NumCharacters (line 8) | class NumCharacters(TransformPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/num_unique_separators.py
class NumUniqueSeparators (line 10) | class NumUniqueSeparators(TransformPrimitive):
method __init__ (line 33) | def __init__(self, separators=NATURAL_LANGUAGE_SEPARATORS):
method get_function (line 37) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/num_words.py
class NumWords (line 15) | class NumWords(TransformPrimitive):
method get_function (line 34) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/number_of_common_words.py
class NumberOfCommonWords (line 15) | class NumberOfCommonWords(TransformPrimitive):
method __init__ (line 48) | def __init__(
method get_function (line 56) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/number_of_hashtags.py
class NumberOfHashtags (line 9) | class NumberOfHashtags(CountString):
method __init__ (line 41) | def __init__(self):
FILE: featuretools/primitives/standard/transform/natural_language/number_of_mentions.py
class NumberOfMentions (line 12) | class NumberOfMentions(CountString):
method __init__ (line 44) | def __init__(self):
FILE: featuretools/primitives/standard/transform/natural_language/number_of_unique_words.py
class NumberOfUniqueWords (line 14) | class NumberOfUniqueWords(TransformPrimitive):
method __init__ (line 44) | def __init__(self, case_insensitive=False):
method get_function (line 47) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/number_of_words_in_quotes.py
class NumberOfWordsInQuotes (line 14) | class NumberOfWordsInQuotes(TransformPrimitive):
method __init__ (line 44) | def __init__(self, quote_type="both"):
method get_function (line 59) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/punctuation_count.py
class PunctuationCount (line 14) | class PunctuationCount(CountString):
method __init__ (line 37) | def __init__(self):
FILE: featuretools/primitives/standard/transform/natural_language/title_word_count.py
class TitleWordCount (line 10) | class TitleWordCount(CountString):
method __init__ (line 33) | def __init__(self):
FILE: featuretools/primitives/standard/transform/natural_language/total_word_length.py
class TotalWordLength (line 10) | class TotalWordLength(TransformPrimitive):
method __init__ (line 36) | def __init__(self, do_not_count=PUNCTUATION_AND_WHITESPACE):
method get_function (line 39) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/upper_case_count.py
class UpperCaseCount (line 10) | class UpperCaseCount(CountString):
method __init__ (line 32) | def __init__(self):
FILE: featuretools/primitives/standard/transform/natural_language/upper_case_word_count.py
class UpperCaseWordCount (line 14) | class UpperCaseWordCount(TransformPrimitive):
method get_function (line 35) | def get_function(self):
FILE: featuretools/primitives/standard/transform/natural_language/whitespace_count.py
class WhitespaceCount (line 6) | class WhitespaceCount(CountString):
method __init__ (line 23) | def __init__(self):
FILE: featuretools/primitives/standard/transform/not_primitive.py
class Not (line 8) | class Not(TransformPrimitive):
method generate_name (line 25) | def generate_name(self, base_feature_names):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/transform/nth_week_of_month.py
class NthWeekOfMonth (line 9) | class NthWeekOfMonth(TransformPrimitive):
method get_function (line 33) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/absolute.py
class Absolute (line 7) | class Absolute(TransformPrimitive):
method get_function (line 22) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/cosine.py
class Cosine (line 8) | class Cosine(TransformPrimitive):
method get_function (line 23) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/diff.py
class Diff (line 6) | class Diff(TransformPrimitive):
method __init__ (line 39) | def __init__(self, periods=0):
method get_function (line 42) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/natural_logarithm.py
class NaturalLogarithm (line 8) | class NaturalLogarithm(TransformPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/negate.py
class Negate (line 6) | class Negate(TransformPrimitive):
method get_function (line 20) | def get_function(self):
method generate_name (line 26) | def generate_name(self, base_feature_names):
FILE: featuretools/primitives/standard/transform/numeric/percentile.py
class Percentile (line 6) | class Percentile(TransformPrimitive):
method get_function (line 26) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/rate_of_change.py
class RateOfChange (line 7) | class RateOfChange(TransformPrimitive):
method get_function (line 29) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/same_as_previous.py
class SameAsPrevious (line 7) | class SameAsPrevious(TransformPrimitive):
method __init__ (line 54) | def __init__(self, fill_method="pad", limit=None):
method get_function (line 60) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/sine.py
class Sine (line 8) | class Sine(TransformPrimitive):
method get_function (line 23) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/square_root.py
class SquareRoot (line 8) | class SquareRoot(TransformPrimitive):
method get_function (line 23) | def get_function(self):
FILE: featuretools/primitives/standard/transform/numeric/tangent.py
class Tangent (line 8) | class Tangent(TransformPrimitive):
method get_function (line 23) | def get_function(self):
FILE: featuretools/primitives/standard/transform/percent_change.py
class PercentChange (line 7) | class PercentChange(TransformPrimitive):
method __init__ (line 90) | def __init__(self, periods=1, fill_method="pad", limit=None, freq=None):
method get_function (line 98) | def get_function(self):
FILE: featuretools/primitives/standard/transform/postal/one_digit_postal_code.py
class OneDigitPostalCode (line 8) | class OneDigitPostalCode(TransformPrimitive):
method get_function (line 25) | def get_function(self):
FILE: featuretools/primitives/standard/transform/postal/two_digit_postal_code.py
class TwoDigitPostalCode (line 8) | class TwoDigitPostalCode(TransformPrimitive):
method get_function (line 26) | def get_function(self):
FILE: featuretools/primitives/standard/transform/savgol_filter.py
class SavgolFilter (line 11) | class SavgolFilter(TransformPrimitive):
method __init__ (line 82) | def __init__(
method get_function (line 116) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/expanding/expanding_count.py
class ExpandingCount (line 11) | class ExpandingCount(TransformPrimitive):
method __init__ (line 58) | def __init__(self, gap=1, min_periods=1):
method get_function (line 62) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/expanding/expanding_max.py
class ExpandingMax (line 11) | class ExpandingMax(TransformPrimitive):
method __init__ (line 61) | def __init__(self, gap=1, min_periods=1):
method get_function (line 65) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/expanding/expanding_mean.py
class ExpandingMean (line 11) | class ExpandingMean(TransformPrimitive):
method __init__ (line 61) | def __init__(self, gap=1, min_periods=1):
method get_function (line 65) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/expanding/expanding_min.py
class ExpandingMin (line 11) | class ExpandingMin(TransformPrimitive):
method __init__ (line 60) | def __init__(self, gap=1, min_periods=1):
method get_function (line 64) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/expanding/expanding_std.py
class ExpandingSTD (line 11) | class ExpandingSTD(TransformPrimitive):
method __init__ (line 64) | def __init__(self, gap=1, min_periods=1):
method get_function (line 68) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/expanding/expanding_trend.py
class ExpandingTrend (line 12) | class ExpandingTrend(TransformPrimitive):
method __init__ (line 65) | def __init__(self, gap=1, min_periods=1):
method get_function (line 69) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/lag.py
class Lag (line 8) | class Lag(TransformPrimitive):
method __init__ (line 53) | def __init__(self, periods=1):
method get_function (line 56) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/numeric_lag.py
class NumericLag (line 9) | class NumericLag(TransformPrimitive):
method __init__ (line 45) | def __init__(self, periods=1, fill_value=None):
method get_function (line 53) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/rolling_count.py
class RollingCount (line 11) | class RollingCount(TransformPrimitive):
method __init__ (line 92) | def __init__(self, window_length=3, gap=1, min_periods=0):
method get_function (line 97) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/rolling_max.py
class RollingMax (line 11) | class RollingMax(TransformPrimitive):
method __init__ (line 96) | def __init__(self, window_length=3, gap=1, min_periods=1):
method get_function (line 101) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/rolling_mean.py
class RollingMean (line 12) | class RollingMean(TransformPrimitive):
method __init__ (line 89) | def __init__(self, window_length=3, gap=1, min_periods=0):
method get_function (line 94) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/rolling_min.py
class RollingMin (line 11) | class RollingMin(TransformPrimitive):
method __init__ (line 95) | def __init__(self, window_length=3, gap=1, min_periods=1):
method get_function (line 100) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/rolling_outlier_count.py
class RollingOutlierCount (line 13) | class RollingOutlierCount(TransformPrimitive):
method __init__ (line 95) | def __init__(self, window_length=3, gap=1, min_periods=0):
method get_outliers_count (line 100) | def get_outliers_count(self, numeric_series):
method get_function (line 110) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/rolling_std.py
class RollingSTD (line 11) | class RollingSTD(TransformPrimitive):
method __init__ (line 94) | def __init__(self, window_length=3, gap=1, min_periods=1):
method get_function (line 99) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/rolling_trend.py
class RollingTrend (line 12) | class RollingTrend(TransformPrimitive):
method __init__ (line 78) | def __init__(self, window_length=3, gap=1, min_periods=0):
method get_function (line 83) | def get_function(self):
FILE: featuretools/primitives/standard/transform/time_series/utils.py
function roll_series_with_gap (line 10) | def roll_series_with_gap(
function _get_rolled_series_without_gap (line 61) | def _get_rolled_series_without_gap(window: Series, gap_offset: str) -> S...
function apply_roll_with_offset_gap (line 89) | def apply_roll_with_offset_gap(
function _check_window_length (line 121) | def _check_window_length(window_length: Union[int, str]) -> None:
function _check_gap (line 138) | def _check_gap(window_length: Union[int, str], gap: Union[int, str]) -> ...
function apply_rolling_agg_to_series (line 161) | def apply_rolling_agg_to_series(
function _apply_gap_for_expanding_primitives (line 243) | def _apply_gap_for_expanding_primitives(
FILE: featuretools/primitives/standard/transform/url/url_to_domain.py
class URLToDomain (line 7) | class URLToDomain(TransformPrimitive):
method get_function (line 27) | def get_function(self):
FILE: featuretools/primitives/standard/transform/url/url_to_protocol.py
class URLToProtocol (line 7) | class URLToProtocol(TransformPrimitive):
method get_function (line 28) | def get_function(self):
FILE: featuretools/primitives/standard/transform/url/url_to_tld.py
class URLToTLD (line 8) | class URLToTLD(TransformPrimitive):
method get_function (line 30) | def get_function(self):
FILE: featuretools/primitives/utils.py
function _get_primitives (line 21) | def _get_primitives(primitive_kind):
function get_aggregation_primitives (line 34) | def get_aggregation_primitives():
function get_transform_primitives (line 39) | def get_transform_primitives():
function get_all_primitives (line 44) | def get_all_primitives():
function _get_natural_language_primitives (line 55) | def _get_natural_language_primitives():
function list_primitives (line 79) | def list_primitives():
function summarize_primitives (line 117) | def summarize_primitives() -> pd.DataFrame:
function get_default_aggregation_primitives (line 162) | def get_default_aggregation_primitives():
function get_default_transform_primitives (line 178) | def get_default_transform_primitives():
function _get_descriptions (line 193) | def _get_descriptions(primitives):
function _get_summary_primitives (line 206) | def _get_summary_primitives(primitives: List) -> Dict[str, int]:
function _check_input_types (line 271) | def _check_input_types(
function _get_names_primitives (line 287) | def _get_names_primitives(primitive_func):
function _get_unique_input_types (line 303) | def _get_unique_input_types(input_types):
function list_primitive_files (line 313) | def list_primitive_files(directory):
function check_valid_primitive_path (line 324) | def check_valid_primitive_path(path):
function load_primitive_from_file (line 336) | def load_primitive_from_file(filepath):
function serialize_primitive (line 362) | def serialize_primitive(primitive: PrimitiveBase):
class PrimitivesDeserializer (line 375) | class PrimitivesDeserializer(object):
method __init__ (line 384) | def __init__(self):
method deserialize_primitive (line 391) | def deserialize_primitive(self, primitive_dict):
method _find_class_in_descendants (line 418) | def _find_class_in_descendants(self, search_key):
function get_all_logical_type_names (line 427) | def get_all_logical_type_names():
FILE: featuretools/selection/selection.py
function remove_low_information_features (line 5) | def remove_low_information_features(feature_matrix, features=None):
function remove_highly_null_features (line 31) | def remove_highly_null_features(feature_matrix, features=None, pct_null_...
function remove_single_value_features (line 70) | def remove_single_value_features(
function remove_highly_correlated_features (line 102) | def remove_highly_correlated_features(
function _apply_feature_selection (line 203) | def _apply_feature_selection(keep, feature_matrix, features=None):
FILE: featuretools/synthesis/deep_feature_synthesis.py
class DeepFeatureSynthesis (line 40) | class DeepFeatureSynthesis(object):
method __init__ (line 132) | def __init__(
method build_features (line 283) | def build_features(self, return_types=None, verbose=False):
method _filter_features (line 363) | def _filter_features(self, features):
method _run_dfs (line 382) | def _run_dfs(self, dataframe, relationship_path, all_features, max_dep...
method _handle_new_feature (line 533) | def _handle_new_feature(self, new_feature, all_features):
method _add_identity_features (line 566) | def _add_identity_features(self, all_features, dataframe):
method _build_where_clauses (line 588) | def _build_where_clauses(self, all_features, dataframe):
method _build_transform_features (line 629) | def _build_transform_features(
method _build_forward_features (line 768) | def _build_forward_features(self, all_features, relationship_path, max...
method _build_agg_features (line 796) | def _build_agg_features(
method _features_by_type (line 895) | def _features_by_type(
method _feature_in_relationship_path (line 970) | def _feature_in_relationship_path(self, relationship_path, feature):
method _get_matching_inputs (line 990) | def _get_matching_inputs(
function _match_contains_numeric_foreign_key (line 1055) | def _match_contains_numeric_foreign_key(match):
function not_a_transform_input (line 1060) | def not_a_transform_input(feature):
function _find_root_primitive (line 1069) | def _find_root_primitive(feature):
function _check_if_stacking_is_prohibited (line 1079) | def _check_if_stacking_is_prohibited(
function _check_if_stacking_is_permitted (line 1104) | def _check_if_stacking_is_permitted(
function can_stack_primitive_on_inputs (line 1124) | def can_stack_primitive_on_inputs(primitive: PrimitiveBase, inputs: List...
function match_by_schema (line 1171) | def match_by_schema(features, column_schema):
function match (line 1175) | def match(
function handle_primitive (line 1233) | def handle_primitive(primitive):
function check_primitive (line 1240) | def check_primitive(
function _all_direct_and_same_path (line 1284) | def _all_direct_and_same_path(input_features: List[FeatureBase]) -> bool:
function _build_ignore_columns (line 1295) | def _build_ignore_columns(input_dict: Dict[str, List[str]]) -> DefaultDi...
function _direct_of_dataframe (line 1311) | def _direct_of_dataframe(feature, parent_dataframe):
function get_feature_depth (line 1318) | def get_feature_depth(feature, stop_at=None):
FILE: featuretools/synthesis/dfs.py
function dfs (line 12) | def dfs(
function warn_unused_primitives (line 302) | def warn_unused_primitives(unused_primitives):
FILE: featuretools/synthesis/encode_features.py
function encode_features (line 13) | def encode_features(
FILE: featuretools/synthesis/get_valid_primitives.py
function get_valid_primitives (line 10) | def get_valid_primitives(
FILE: featuretools/synthesis/utils.py
function _categorize_features (line 10) | def _categorize_features(features):
function get_unused_primitives (line 52) | def get_unused_primitives(specified, used):
FILE: featuretools/tests/computational_backend/test_calculate_feature_matrix.py
function test_scatter_warning (line 66) | def test_scatter_warning(caplog):
function test_calc_feature_matrix (line 76) | def test_calc_feature_matrix(es):
function test_cfm_compose (line 165) | def test_cfm_compose(es, lt):
function test_cfm_compose_approximate (line 180) | def test_cfm_compose_approximate(es, lt):
function test_cfm_approximate_correct_ordering (line 197) | def test_cfm_approximate_correct_ordering():
function test_cfm_no_cutoff_time_index (line 260) | def test_cfm_no_cutoff_time_index(es):
function test_cfm_duplicated_index_in_cutoff_time (line 305) | def test_cfm_duplicated_index_in_cutoff_time(es):
function test_saveprogress (line 326) | def test_saveprogress(es, tmp_path):
function test_cutoff_time_correctly (line 365) | def test_cutoff_time_correctly(es):
function test_cutoff_time_binning (line 382) | def test_cutoff_time_binning():
function test_cutoff_time_columns_order (line 417) | def test_cutoff_time_columns_order(es):
function test_cutoff_time_df_redundant_column_names (line 446) | def test_cutoff_time_df_redundant_column_names(es):
function test_training_window (line 485) | def test_training_window(es):
function test_training_window_overlap (line 585) | def test_training_window_overlap(es):
function test_include_cutoff_time_without_training_window (line 626) | def test_include_cutoff_time_without_training_window(es):
function test_approximate_dfeat_of_agg_on_target_include_cutoff_time (line 689) | def test_approximate_dfeat_of_agg_on_target_include_cutoff_time(es):
function test_training_window_recent_time_index (line 730) | def test_training_window_recent_time_index(es):
function test_approximate_multiple_instances_per_cutoff_time (line 818) | def test_approximate_multiple_instances_per_cutoff_time(es):
function test_approximate_with_multiple_paths (line 838) | def test_approximate_with_multiple_paths(diamond_es):
function test_approximate_dfeat_of_agg_on_target (line 859) | def test_approximate_dfeat_of_agg_on_target(es):
function test_approximate_dfeat_of_need_all_values (line 879) | def test_approximate_dfeat_of_need_all_values(es):
function test_uses_full_dataframe_feat_of_approximate (line 929) | def test_uses_full_dataframe_feat_of_approximate(es):
function test_approximate_dfeat_of_dfeat_of_agg_on_target (line 992) | def test_approximate_dfeat_of_dfeat_of_agg_on_target(es):
function test_empty_path_approximate_full (line 1011) | def test_empty_path_approximate_full(es):
function test_approx_base_feature_is_also_first_class_feature (line 1040) | def test_approx_base_feature_is_also_first_class_feature(es):
function test_approximate_time_split_returns_the_same_result (line 1066) | def test_approximate_time_split_returns_the_same_result(es):
function test_approximate_returns_correct_empty_default_values (line 1114) | def test_approximate_returns_correct_empty_default_values(es):
function test_approximate_child_aggs_handled_correctly (line 1141) | def test_approximate_child_aggs_handled_correctly(es):
function test_cutoff_time_naming (line 1179) | def test_cutoff_time_naming(es):
function test_cutoff_time_extra_columns (line 1218) | def test_cutoff_time_extra_columns(es):
function test_cutoff_time_extra_columns_approximate (line 1245) | def test_cutoff_time_extra_columns_approximate(es):
function test_cutoff_time_extra_columns_same_name (line 1277) | def test_cutoff_time_extra_columns_same_name(es):
function test_cutoff_time_extra_columns_same_name_approximate (line 1305) | def test_cutoff_time_extra_columns_same_name_approximate(es):
function test_instances_after_cutoff_time_removed (line 1338) | def test_instances_after_cutoff_time_removed(es):
function test_instances_with_id_kept_after_cutoff (line 1361) | def test_instances_with_id_kept_after_cutoff(es):
function test_cfm_returns_original_time_indexes (line 1386) | def test_cfm_returns_original_time_indexes(es):
function test_cfm_returns_original_time_indexes_approximate (line 1418) | def test_cfm_returns_original_time_indexes_approximate(es):
function test_dask_kwargs (line 1493) | def test_dask_kwargs(es, dask_cluster):
function test_dask_persisted_es (line 1520) | def test_dask_persisted_es(es, capsys, dask_cluster):
class TestCreateClientAndCluster (line 1558) | class TestCreateClientAndCluster(object):
method test_user_cluster_as_string (line 1559) | def test_user_cluster_as_string(self, monkeypatch):
method test_cluster_creation (line 1569) | def test_cluster_creation(self, monkeypatch):
method test_not_enough_memory (line 1609) | def test_not_enough_memory(self, monkeypatch):
function test_parallel_failure_raises_correct_error (line 1628) | def test_parallel_failure_raises_correct_error(es):
function test_warning_not_enough_chunks (line 1653) | def test_warning_not_enough_chunks(
function test_n_jobs (line 1674) | def test_n_jobs():
function test_parallel_cutoff_time_column_pass_through (line 1692) | def test_parallel_cutoff_time_column_pass_through(es, dask_cluster):
function test_integer_time_index (line 1722) | def test_integer_time_index(int_es):
function test_integer_time_index_single_cutoff_value (line 1741) | def test_integer_time_index_single_cutoff_value(int_es):
function test_integer_time_index_datetime_cutoffs (line 1758) | def test_integer_time_index_datetime_cutoffs(int_es):
function test_integer_time_index_passes_extra_columns (line 1775) | def test_integer_time_index_passes_extra_columns(int_es):
function test_integer_time_index_mixed_cutoff (line 1794) | def test_integer_time_index_mixed_cutoff(int_es):
function test_datetime_index_mixed_cutoff (line 1826) | def test_datetime_index_mixed_cutoff(es):
function test_no_data_for_cutoff_time (line 1858) | def test_no_data_for_cutoff_time(mock_customer):
function test_instances_not_in_data (line 1900) | def test_instances_not_in_data(es):
function test_some_instances_not_in_data (line 1928) | def test_some_instances_not_in_data(es):
function test_missing_instances_with_categorical_index (line 1969) | def test_missing_instances_with_categorical_index(es):
function test_handle_chunk_size (line 1986) | def test_handle_chunk_size():
function test_chunk_dataframe_groups (line 2010) | def test_chunk_dataframe_groups():
function test_calls_progress_callback (line 2029) | def test_calls_progress_callback(mock_customer):
function test_calls_progress_callback_cluster (line 2098) | def test_calls_progress_callback_cluster(mock_customer, dask_cluster):
function test_closes_tqdm (line 2136) | def test_closes_tqdm(es):
function test_approximate_with_single_cutoff_warns (line 2164) | def test_approximate_with_single_cutoff_warns(es):
function test_calc_feature_matrix_with_cutoff_df_and_instance_ids (line 2201) | def test_calc_feature_matrix_with_cutoff_df_and_instance_ids(es):
function test_calculate_feature_matrix_returns_default_values (line 2229) | def test_calculate_feature_matrix_returns_default_values(default_value_es):
function test_dataframes_relationships (line 2247) | def test_dataframes_relationships(dataframes, relationships):
function test_no_dataframes (line 2263) | def test_no_dataframes(dataframes, relationships):
function test_no_relationships (line 2276) | def test_no_relationships(dataframes):
function test_cfm_with_invalid_time_index (line 2292) | def test_cfm_with_invalid_time_index(es):
function test_cfm_introduces_nan_values_in_direct_feats (line 2301) | def test_cfm_introduces_nan_values_in_direct_feats(es):
function test_feature_origins_present_on_all_fm_cols (line 2325) | def test_feature_origins_present_on_all_fm_cols(es):
function test_renamed_features_have_expected_column_names_in_feature_matrix (line 2349) | def test_renamed_features_have_expected_column_names_in_feature_matrix(es):
FILE: featuretools/tests/computational_backend/test_feature_set.py
function test_feature_trie_without_needs_full_dataframe (line 14) | def test_feature_trie_without_needs_full_dataframe(diamond_es):
function test_feature_trie_with_needs_full_dataframe (line 81) | def test_feature_trie_with_needs_full_dataframe(diamond_es):
function test_feature_trie_with_needs_full_dataframe_direct (line 129) | def test_feature_trie_with_needs_full_dataframe_direct(es):
function test_feature_trie_ignores_approximate_features (line 165) | def test_feature_trie_ignores_approximate_features(es):
FILE: featuretools/tests/computational_backend/test_feature_set_calculator.py
function test_make_identity (line 48) | def test_make_identity(es):
function test_make_dfeat (line 59) | def test_make_dfeat(es):
function test_make_agg_feat_of_identity_column (line 73) | def test_make_agg_feat_of_identity_column(es):
function test_full_dataframe_trans_of_agg (line 88) | def test_full_dataframe_trans_of_agg(es):
function test_make_agg_feat_of_identity_index_column (line 104) | def test_make_agg_feat_of_identity_index_column(es):
function test_make_agg_feat_where_count (line 119) | def test_make_agg_feat_where_count(es):
function test_make_agg_feat_using_prev_time (line 135) | def test_make_agg_feat_using_prev_time(es):
function test_make_agg_feat_using_prev_n_events (line 165) | def test_make_agg_feat_using_prev_n_events(es):
function test_make_agg_feat_multiple_dtypes (line 211) | def test_make_agg_feat_multiple_dtypes(es):
function test_make_agg_feat_where_different_identity_feat (line 238) | def test_make_agg_feat_where_different_identity_feat(es):
function test_make_agg_feat_of_grandchild_dataframe (line 303) | def test_make_agg_feat_of_grandchild_dataframe(es):
function test_make_agg_feat_where_count_feat (line 317) | def test_make_agg_feat_where_count_feat(es):
function test_make_compare_feat (line 347) | def test_make_compare_feat(es):
function test_make_agg_feat_where_count_and_device_type_feat (line 381) | def test_make_agg_feat_where_count_and_device_type_feat(es):
function test_make_agg_feat_where_count_or_device_type_feat (line 412) | def test_make_agg_feat_where_count_or_device_type_feat(es):
function test_make_agg_feat_of_agg_feat (line 443) | def test_make_agg_feat_of_agg_feat(es):
function df (line 464) | def df():
function test_make_3_stacked_agg_feats (line 476) | def test_make_3_stacked_agg_feats(df):
function test_make_dfeat_of_agg_feat_on_self (line 524) | def test_make_dfeat_of_agg_feat_on_self(es):
function test_make_dfeat_of_agg_feat_through_parent (line 554) | def test_make_dfeat_of_agg_feat_through_parent(es):
function test_make_deep_agg_feat_of_dfeat_of_agg_feat (line 583) | def test_make_deep_agg_feat_of_dfeat_of_agg_feat(es):
function test_deep_agg_feat_chain (line 617) | def test_deep_agg_feat_chain(es):
function test_topn (line 642) | def test_topn(es):
function test_trend (line 672) | def test_trend(es):
function test_direct_squared (line 692) | def test_direct_squared(es):
function test_agg_empty_child (line 702) | def test_agg_empty_child(es):
function test_diamond_entityset (line 721) | def test_diamond_entityset(diamond_es):
function test_two_relationships_to_single_dataframe (line 752) | def test_two_relationships_to_single_dataframe(games_es):
function parent_child (line 785) | def parent_child():
function test_empty_child_dataframe (line 799) | def test_empty_child_dataframe(parent_child):
function test_with_features_built_from_es_metadata (line 915) | def test_with_features_built_from_es_metadata(es):
function test_handles_primitive_function_name_uniqueness (line 931) | def test_handles_primitive_function_name_uniqueness(es):
function test_returns_order_of_instance_ids (line 1054) | def test_returns_order_of_instance_ids(es):
function test_calls_progress_callback (line 1066) | def test_calls_progress_callback(es):
function test_precalculated_features (line 1130) | def test_precalculated_features(es):
function test_nunique_nested_with_agg_bug (line 1185) | def test_nunique_nested_with_agg_bug(es):
FILE: featuretools/tests/computational_backend/test_utils.py
function test_replace_inf_values (line 8) | def test_replace_inf_values(divide_by_zero_es):
function test_replace_inf_values_specify_cols (line 35) | def test_replace_inf_values_specify_cols(divide_by_zero_es):
FILE: featuretools/tests/config_tests/test_config.py
function test_get_default_config_does_not_change (line 4) | def test_get_default_config_does_not_change():
function test_set_and_get_config (line 17) | def test_set_and_get_config():
function test_get_all (line 28) | def test_get_all():
FILE: featuretools/tests/conftest.py
function dask_cluster (line 18) | def dask_cluster():
function three_worker_dask_cluster (line 29) | def three_worker_dask_cluster():
function make_es (line 40) | def make_es():
function make_int_es (line 45) | def make_int_es():
function es (line 50) | def es(make_es):
function int_es (line 55) | def int_es(make_int_es):
function latlong_df (line 60) | def latlong_df():
function diamond_es (line 66) | def diamond_es():
function default_value_es (line 120) | def default_value_es():
function home_games_es (line 136) | def home_games_es():
function games_es (line 153) | def games_es(home_games_es):
function mock_customer (line 158) | def mock_customer():
function lt (line 163) | def lt(es):
function dataframes (line 186) | def dataframes():
function relationships (line 204) | def relationships():
function transform_es (line 209) | def transform_es():
function divide_by_zero_es (line 233) | def divide_by_zero_es():
function window_series (line 245) | def window_series():
function window_date_range (line 253) | def window_date_range():
function rolling_outlier_series (line 258) | def rolling_outlier_series():
function postal_code_dataframe (line 266) | def postal_code_dataframe():
function create_test_credentials (line 284) | def create_test_credentials(test_path):
function create_test_config (line 291) | def create_test_config(test_path_config):
function setup_test_profile (line 299) | def setup_test_profile(monkeypatch, tmp_path):
function test_aggregation_primitive (line 322) | def test_aggregation_primitive():
function test_transform_primitive (line 333) | def test_transform_primitive():
function strings_that_have_triggered_errors_before (line 344) | def strings_that_have_triggered_errors_before():
FILE: featuretools/tests/demo_tests/test_demo_data.py
function set_testing_headers (line 11) | def set_testing_headers():
function test_load_retail_diff (line 17) | def test_load_retail_diff():
function test_mock_customer (line 27) | def test_mock_customer():
function test_load_flight (line 50) | def test_load_flight():
function test_weather (line 64) | def test_weather():
FILE: featuretools/tests/entityset_tests/test_es.py
function test_normalize_time_index_as_additional_column (line 34) | def test_normalize_time_index_as_additional_column(es):
function test_normalize_time_index_as_copy_column (line 48) | def test_normalize_time_index_as_copy_column(es):
function test_normalize_time_index_as_copy_column_new_time_index (line 65) | def test_normalize_time_index_as_copy_column_new_time_index(es):
function test_normalize_time_index_as_copy_column_no_time_index (line 83) | def test_normalize_time_index_as_copy_column_no_time_index(es):
function test_cannot_re_add_relationships_that_already_exists (line 100) | def test_cannot_re_add_relationships_that_already_exists(es):
function test_add_relationships_convert_type (line 117) | def test_add_relationships_convert_type(es):
function test_add_relationship_diff_param_logical_types (line 128) | def test_add_relationship_diff_param_logical_types(es):
function test_add_relationship_different_logical_types_same_dtype (line 178) | def test_add_relationship_different_logical_types_same_dtype(es):
function test_add_relationship_different_compatible_dtypes (line 222) | def test_add_relationship_different_compatible_dtypes(es):
function test_add_relationship_errors_child_v_index (line 265) | def test_add_relationship_errors_child_v_index(es):
function test_add_relationship_empty_child_convert_dtype (line 275) | def test_add_relationship_empty_child_convert_dtype(es):
function test_add_relationship_with_relationship_object (line 292) | def test_add_relationship_with_relationship_object(es):
function test_add_relationships_with_relationship_object (line 298) | def test_add_relationships_with_relationship_object(es):
function test_add_relationship_error (line 304) | def test_add_relationship_error(es):
function test_query_by_values_returns_rows_in_given_order (line 313) | def test_query_by_values_returns_rows_in_given_order():
function test_query_by_values_secondary_time_index (line 334) | def test_query_by_values_secondary_time_index(es):
function test_query_by_id (line 344) | def test_query_by_id(es):
function test_query_by_single_value (line 349) | def test_query_by_single_value(es):
function test_query_by_df (line 354) | def test_query_by_df(es):
function test_query_by_id_with_time (line 361) | def test_query_by_id_with_time(es):
function test_query_by_column_with_time (line 371) | def test_query_by_column_with_time(es):
function test_query_by_column_with_no_lti_and_training_window (line 385) | def test_query_by_column_with_no_lti_and_training_window(es):
function test_query_by_column_with_lti_and_training_window (line 402) | def test_query_by_column_with_lti_and_training_window(es):
function test_query_by_indexed_column (line 416) | def test_query_by_indexed_column(es):
function df (line 427) | def df():
function test_check_columns_and_dataframe (line 431) | def test_check_columns_and_dataframe(df):
function test_make_index_any_location (line 450) | def test_make_index_any_location(df):
function test_replace_dataframe_and_create_index (line 465) | def test_replace_dataframe_and_create_index(es):
function test_replace_dataframe_created_index_present (line 492) | def test_replace_dataframe_created_index_present(es):
function test_index_any_location (line 516) | def test_index_any_location(df):
function test_extra_column_type (line 530) | def test_extra_column_type(df):
function test_add_parent_not_index_column (line 547) | def test_add_parent_not_index_column(es):
function df2 (line 554) | def df2():
function test_none_index (line 558) | def test_none_index(df2):
function df3 (line 582) | def df3():
function test_unknown_index (line 586) | def test_unknown_index(df3):
function test_doesnt_remake_index (line 602) | def test_doesnt_remake_index(df):
function test_bad_time_index_column (line 616) | def test_bad_time_index_column(df3):
function df4 (line 631) | def df4():
function test_converts_dtype_on_init (line 645) | def test_converts_dtype_on_init(df4):
function test_converts_dtype_after_init (line 660) | def test_converts_dtype_after_init(df4):
function datetime1 (line 692) | def datetime1():
function test_converts_datetime (line 698) | def test_converts_datetime(datetime1):
function datetime2 (line 718) | def datetime2():
function test_handles_datetime_format (line 727) | def test_handles_datetime_format(datetime2):
function test_handles_datetime_mismatch (line 756) | def test_handles_datetime_mismatch():
function test_dataframe_init (line 773) | def test_dataframe_init(es):
function bad_df (line 803) | def bad_df():
function test_nonstr_column_names (line 807) | def test_nonstr_column_names(bad_df):
function test_sort_time_id (line 818) | def test_sort_time_id():
function test_already_sorted_parameter (line 837) | def test_already_sorted_parameter():
function test_concat_not_inplace (line 866) | def test_concat_not_inplace(es):
function test_concat_inplace (line 884) | def test_concat_inplace(es):
function test_concat_with_lti (line 900) | def test_concat_with_lti(es):
function test_concat_errors (line 933) | def test_concat_errors(es):
function test_concat_sort_index_with_time_index (line 945) | def test_concat_sort_index_with_time_index(es):
function test_concat_sort_index_without_time_index (line 970) | def test_concat_sort_index_without_time_index(es):
function test_concat_with_make_index (line 1011) | def test_concat_with_make_index(es):
function transactions_df (line 1057) | def transactions_df():
function test_set_time_type_on_init (line 1068) | def test_set_time_type_on_init(transactions_df):
function test_sets_time_when_adding_dataframe (line 1088) | def test_sets_time_when_adding_dataframe(transactions_df):
function test_secondary_time_index_no_primary_time_index (line 1144) | def test_secondary_time_index_no_primary_time_index(es):
function test_set_non_valid_time_index_type (line 1158) | def test_set_non_valid_time_index_type(es):
function test_checks_time_type_setting_secondary_time_index (line 1164) | def test_checks_time_type_setting_secondary_time_index(es):
function test_normalize_dataframe (line 1246) | def test_normalize_dataframe(es):
function test_normalize_dataframe_add_index_as_column (line 1283) | def test_normalize_dataframe_add_index_as_column(es):
function test_normalize_dataframe_new_time_index_in_base_dataframe_error_check (line 1305) | def test_normalize_dataframe_new_time_index_in_base_dataframe_error_chec...
function test_normalize_dataframe_new_time_index_in_column_list_error_check (line 1316) | def test_normalize_dataframe_new_time_index_in_column_list_error_check(es):
function test_normalize_dataframe_new_time_index_copy_success_check (line 1329) | def test_normalize_dataframe_new_time_index_copy_success_check(es):
function test_normalize_dataframe_new_time_index_additional_success_check (line 1340) | def test_normalize_dataframe_new_time_index_additional_success_check(es):
function normalize_es (line 1352) | def normalize_es():
function test_normalize_time_index_from_none (line 1369) | def test_normalize_time_index_from_none(normalize_es):
function test_raise_error_if_dupicate_additional_columns_passed (line 1385) | def test_raise_error_if_dupicate_additional_columns_passed(es):
function test_raise_error_if_dupicate_copy_columns_passed (line 1398) | def test_raise_error_if_dupicate_copy_columns_passed(es):
function test_normalize_dataframe_copies_logical_types (line 1411) | def test_normalize_dataframe_copies_logical_types(es):
function test_make_time_index_keeps_original_sorting (line 1446) | def test_make_time_index_keeps_original_sorting():
function test_normalize_dataframe_new_time_index (line 1471) | def test_normalize_dataframe_new_time_index(es):
function test_normalize_dataframe_same_index (line 1488) | def test_normalize_dataframe_same_index(es):
function test_secondary_time_index (line 1514) | def test_secondary_time_index(es):
function test_sizeof (line 1532) | def test_sizeof(es):
function test_construct_without_id (line 1541) | def test_construct_without_id():
function test_repr_without_id (line 1545) | def test_repr_without_id():
function test_getitem_without_id (line 1550) | def test_getitem_without_id():
function test_metadata_without_id (line 1556) | def test_metadata_without_id():
function datetime3 (line 1562) | def datetime3():
function test_datetime64_conversion (line 1566) | def test_datetime64_conversion(datetime3):
function index_df (line 1583) | def index_df():
function test_same_index_values (line 1593) | def test_same_index_values(index_df):
function test_use_time_index (line 1626) | def test_use_time_index(index_df):
function test_normalize_with_datetime_time_index (line 1654) | def test_normalize_with_datetime_time_index(es):
function test_normalize_with_numeric_time_index (line 1667) | def test_normalize_with_numeric_time_index(int_es):
function test_normalize_with_invalid_time_index (line 1679) | def test_normalize_with_invalid_time_index(es):
function test_entityset_init (line 1691) | def test_entityset_init():
function test_add_interesting_values_specified_vals (line 1734) | def test_add_interesting_values_specified_vals(es):
function test_add_interesting_values_vals_specified_without_dataframe_name (line 1747) | def test_add_interesting_values_vals_specified_without_dataframe_name(es):
function test_add_interesting_values_single_dataframe (line 1756) | def test_add_interesting_values_single_dataframe(es):
function test_add_interesting_values_multiple_dataframes (line 1776) | def test_add_interesting_values_multiple_dataframes(es):
function test_add_interesting_values_verbose_output (line 1796) | def test_add_interesting_values_verbose_output(caplog):
function test_entityset_equality (line 1813) | def test_entityset_equality(es):
function test_entityset_dataframe_dict_and_relationship_equality (line 1862) | def test_entityset_dataframe_dict_and_relationship_equality(es):
function test_entityset_id_equality (line 1949) | def test_entityset_id_equality():
function test_entityset_time_type_equality (line 1958) | def test_entityset_time_type_equality():
function test_entityset_deep_equality (line 1973) | def test_entityset_deep_equality(es):
function test_deepcopy_entityset (line 2030) | def test_deepcopy_entityset(make_es):
function test_deepcopy_entityset_woodwork_changes (line 2048) | def test_deepcopy_entityset_woodwork_changes(es):
function test_deepcopy_entityset_featuretools_changes (line 2061) | def test_deepcopy_entityset_featuretools_changes(es):
function test_es__getstate__key_unique (line 2079) | def test_es__getstate__key_unique(es):
function test_es_pickling (line 2083) | def test_es_pickling(es):
function test_empty_es_pickling (line 2091) | def test_empty_es_pickling():
function test_setitem (line 2100) | def test_setitem(add_dataframe):
function test_latlong_nan_normalization (line 2108) | def test_latlong_nan_normalization(latlong_df):
function test_latlong_nan_normalization_add_dataframe (line 2130) | def test_latlong_nan_normalization_add_dataframe(latlong_df):
FILE: featuretools/tests/entityset_tests/test_es_metadata.py
function test_cannot_re_add_relationships_that_already_exists (line 8) | def test_cannot_re_add_relationships_that_already_exists(es):
function test_add_relationships_convert_type (line 15) | def test_add_relationships_convert_type(es):
function test_get_forward_dataframes (line 22) | def test_get_forward_dataframes(es):
function test_get_backward_dataframes (line 32) | def test_get_backward_dataframes(es):
function test_get_forward_dataframes_deep (line 38) | def test_get_forward_dataframes_deep(es):
function test_get_backward_dataframes_deep (line 54) | def test_get_backward_dataframes_deep(es):
function test_get_forward_relationships (line 61) | def test_get_forward_relationships(es):
function test_get_backward_relationships (line 75) | def test_get_backward_relationships(es):
function test_find_forward_paths (line 87) | def test_find_forward_paths(es):
function test_find_forward_paths_multiple_paths (line 100) | def test_find_forward_paths_multiple_paths(diamond_es):
function test_find_forward_paths_multiple_relationships (line 119) | def test_find_forward_paths_multiple_relationships(games_es):
function employee_df (line 141) | def employee_df():
function test_find_forward_paths_ignores_loops (line 145) | def test_find_forward_paths_ignores_loops(employee_df):
function test_find_backward_paths (line 155) | def test_find_backward_paths(es):
function test_find_backward_paths_multiple_paths (line 168) | def test_find_backward_paths_multiple_paths(diamond_es):
function test_find_backward_paths_multiple_relationships (line 187) | def test_find_backward_paths_multiple_relationships(games_es):
function test_has_unique_path (line 208) | def test_has_unique_path(diamond_es):
function test_raise_key_error_missing_dataframe (line 213) | def test_raise_key_error_missing_dataframe(es):
function test_add_parent_not_index_column (line 224) | def test_add_parent_not_index_column(es):
FILE: featuretools/tests/entityset_tests/test_last_time_index.py
function values_es (line 11) | def values_es(es):
function true_values_lti (line 23) | def true_values_lti():
function true_sessions_lti (line 43) | def true_sessions_lti():
function wishlist_df (line 58) | def wishlist_df():
function extra_session_df (line 86) | def extra_session_df(es):
class TestLastTimeIndex (line 94) | class TestLastTimeIndex(object):
method test_leaf (line 95) | def test_leaf(self, es):
method test_leaf_no_time_index (line 108) | def test_leaf_no_time_index(self, es):
method test_parent (line 121) | def test_parent(self, values_es, true_values_lti):
method test_parent_some_missing (line 131) | def test_parent_some_missing(self, values_es, true_values_lti):
method test_parent_no_time_index (line 159) | def test_parent_no_time_index(self, es, true_sessions_lti):
method test_parent_no_time_index_missing (line 169) | def test_parent_no_time_index_missing(
method test_multiple_children (line 190) | def test_multiple_children(self, es, wishlist_df, true_sessions_lti):
method test_multiple_children_right_missing (line 218) | def test_multiple_children_right_missing(self, es, wishlist_df, true_s...
method test_multiple_children_left_missing (line 249) | def test_multiple_children_left_missing(
method test_multiple_children_all_combined (line 297) | def test_multiple_children_all_combined(
method test_multiple_children_both_missing (line 347) | def test_multiple_children_both_missing(
method test_grandparent (line 388) | def test_grandparent(self, es):
FILE: featuretools/tests/entityset_tests/test_plotting.py
function simple_es (line 12) | def simple_es():
function test_returns_digraph_object (line 19) | def test_returns_digraph_object(es):
function test_saving_png_file (line 25) | def test_saving_png_file(es, tmp_path):
function test_missing_file_extension (line 34) | def test_missing_file_extension(es):
function test_invalid_format (line 43) | def test_invalid_format(es):
function test_multiple_rows (line 52) | def test_multiple_rows(es):
function test_single_row (line 59) | def test_single_row(simple_es):
FILE: featuretools/tests/entityset_tests/test_relationship.py
function test_relationship_path (line 4) | def test_relationship_path(es):
function test_relationship_path_name (line 26) | def test_relationship_path_name(es):
function test_relationship_path_dataframes (line 48) | def test_relationship_path_dataframes(es):
function test_names_when_multiple_relationships_between_dataframes (line 78) | def test_names_when_multiple_relationships_between_dataframes(games_es):
function test_names_when_no_other_relationship_between_dataframes (line 84) | def test_names_when_no_other_relationship_between_dataframes(home_games_...
function test_relationship_serialization (line 90) | def test_relationship_serialization(es):
FILE: featuretools/tests/entityset_tests/test_serialization.py
function test_entityset_description (line 30) | def test_entityset_description(es):
function test_all_ww_logical_types (line 36) | def test_all_ww_logical_types():
function test_with_custom_ww_logical_type (line 53) | def test_with_custom_ww_logical_type():
function test_serialize_invalid_formats (line 81) | def test_serialize_invalid_formats(es, tmp_path):
function test_empty_dataframe (line 88) | def test_empty_dataframe(es):
function test_to_csv (line 96) | def test_to_csv(es, tmp_path):
function test_to_csv_interesting_values (line 106) | def test_to_csv_interesting_values(es, tmp_path):
function test_to_csv_manual_interesting_values (line 113) | def test_to_csv_manual_interesting_values(es, tmp_path):
function test_to_pickle (line 126) | def test_to_pickle(es, tmp_path):
function test_to_pickle_interesting_values (line 134) | def test_to_pickle_interesting_values(es, tmp_path):
function test_to_pickle_manual_interesting_values (line 141) | def test_to_pickle_manual_interesting_values(es, tmp_path):
function test_to_parquet (line 154) | def test_to_parquet(es, tmp_path):
function test_to_parquet_manual_interesting_values (line 164) | def test_to_parquet_manual_interesting_values(es, tmp_path):
function test_to_parquet_interesting_values (line 177) | def test_to_parquet_interesting_values(es, tmp_path):
function test_to_parquet_with_lti (line 184) | def test_to_parquet_with_lti(tmp_path, mock_customer):
function test_to_pickle_id_none (line 191) | def test_to_pickle_id_none(tmp_path):
function s3_client (line 200) | def s3_client():
function s3_bucket (line 212) | def s3_bucket(s3_client, region="us-east-2"):
function make_public (line 223) | def make_public(s3_client, s3_bucket):
function test_serialize_s3_csv (line 229) | def test_serialize_s3_csv(es, s3_client, s3_bucket, profile_name):
function test_serialize_s3_pickle (line 237) | def test_serialize_s3_pickle(es, s3_client, s3_bucket, profile_name):
function test_serialize_s3_parquet (line 245) | def test_serialize_s3_parquet(es, s3_client, s3_bucket, profile_name):
function test_s3_test_profile (line 252) | def test_s3_test_profile(es, s3_client, s3_bucket, setup_test_profile):
function test_serialize_url_csv (line 259) | def test_serialize_url_csv(es):
function test_serialize_subdirs_not_removed (line 265) | def test_serialize_subdirs_not_removed(es, tmp_path):
function test_deserialize_local_tar (line 287) | def test_deserialize_local_tar(es):
function test_deserialize_errors_if_python_version_unsafe (line 296) | def test_deserialize_errors_if_python_version_unsafe(mock_inspect, es):
function test_deserialize_url_csv (line 307) | def test_deserialize_url_csv(es):
function test_deserialize_s3_csv (line 312) | def test_deserialize_s3_csv(es):
function test_operations_invalidate_metadata (line 317) | def test_operations_invalidate_metadata(es):
function test_reset_metadata (line 361) | def test_reset_metadata(es):
function test_later_schema_version (line 373) | def test_later_schema_version(es, caplog, hardcoded_schema_version, warns):
function test_earlier_schema_version (line 395) | def test_earlier_schema_version(
function _check_schema_version (line 417) | def _check_schema_version(version, es, warning_text, caplog, warning_typ...
FILE: featuretools/tests/entityset_tests/test_timedelta.py
function test_timedelta_equality (line 11) | def test_timedelta_equality():
function test_singular (line 16) | def test_singular():
function test_delta_with_observations (line 21) | def test_delta_with_observations(es):
function test_delta_with_time_unit_matches_pandas (line 40) | def test_delta_with_time_unit_matches_pandas(es):
function test_check_timedelta (line 64) | def test_check_timedelta(es):
function test_check_pd_timedelta (line 87) | def test_check_pd_timedelta(es):
function test_string_timedelta_args (line 93) | def test_string_timedelta_args():
function test_feature_takes_timedelta_string (line 102) | def test_feature_takes_timedelta_string(es):
function test_deltas_week (line 112) | def test_deltas_week(es):
function test_relative_year (line 125) | def test_relative_year():
function test_serialization (line 135) | def test_serialization():
function test_relative_month (line 169) | def test_relative_month():
function test_has_multiple_units (line 187) | def test_has_multiple_units():
function test_pd_dateoffset_to_timedelta (line 196) | def test_pd_dateoffset_to_timedelta():
function test_pd_dateoffset_to_timedelta_math (line 216) | def test_pd_dateoffset_to_timedelta_math():
FILE: featuretools/tests/entityset_tests/test_ww_es.py
function test_empty_es (line 19) | def test_empty_es():
function df (line 28) | def df():
function test_init_es_with_dataframe (line 34) | def test_init_es_with_dataframe(df):
function test_init_es_with_woodwork_table_same_name (line 45) | def test_init_es_with_woodwork_table_same_name(df):
function test_init_es_with_woodwork_table_diff_name_error (line 62) | def test_init_es_with_woodwork_table_diff_name_error(df):
function test_init_es_with_dataframe_and_params (line 69) | def test_init_es_with_dataframe_and_params(df):
function test_init_es_with_multiple_dataframes (line 93) | def test_init_es_with_multiple_dataframes(df):
function test_add_dataframe_to_es (line 117) | def test_add_dataframe_to_es(df):
function test_change_es_dataframe_schema (line 138) | def test_change_es_dataframe_schema(df):
function test_init_es_with_relationships (line 148) | def test_init_es_with_relationships(df):
function dates_df (line 171) | def dates_df():
function test_add_secondary_time_index (line 214) | def test_add_secondary_time_index(dates_df):
function test_time_type_check_order (line 231) | def test_time_type_check_order(dates_df):
function test_add_time_index_through_woodwork_different_type (line 249) | def test_add_time_index_through_woodwork_different_type(dates_df):
function test_init_with_mismatched_time_types (line 277) | def test_init_with_mismatched_time_types(dates_df):
function test_int_double_time_type (line 295) | def test_int_double_time_type(dates_df):
function test_normalize_dataframe (line 314) | def test_normalize_dataframe():
function test_replace_dataframe (line 356) | def test_replace_dataframe():
function test_add_last_time_index (line 396) | def test_add_last_time_index(es):
function test_lti_already_has_last_time_column_name (line 407) | def test_lti_already_has_last_time_column_name(es):
function test_numeric_es_last_time_index_logical_type (line 424) | def test_numeric_es_last_time_index_logical_type(int_es):
function test_datetime_es_last_time_index_logical_type (line 434) | def test_datetime_es_last_time_index_logical_type(es):
function test_dataframe_without_name (line 444) | def test_dataframe_without_name(es):
function test_dataframe_with_name_parameter (line 456) | def test_dataframe_with_name_parameter(es):
function test_woodwork_dataframe_without_name_errors (line 472) | def test_woodwork_dataframe_without_name_errors(es):
function test_woodwork_dataframe_with_name (line 485) | def test_woodwork_dataframe_with_name(es):
function test_woodwork_dataframe_ignore_conflicting_name_parameter_warning (line 498) | def test_woodwork_dataframe_ignore_conflicting_name_parameter_warning(es):
function test_woodwork_dataframe_same_name_parameter (line 513) | def test_woodwork_dataframe_same_name_parameter(es):
function test_extra_woodwork_params (line 526) | def test_extra_woodwork_params(es):
function test_replace_dataframe_errors (line 556) | def test_replace_dataframe_errors(es):
function test_replace_dataframe_already_sorted (line 569) | def test_replace_dataframe_already_sorted(es):
function test_replace_dataframe_invalid_schema (line 603) | def test_replace_dataframe_invalid_schema(es):
function test_replace_dataframe_mismatched_index (line 612) | def test_replace_dataframe_mismatched_index(es):
function test_replace_dataframe_different_dtypes (line 622) | def test_replace_dataframe_different_dtypes(es):
function latlong_df (line 641) | def latlong_df():
function test_replace_dataframe_data_transformation (line 656) | def test_replace_dataframe_data_transformation(latlong_df):
function test_replace_dataframe_column_order (line 680) | def test_replace_dataframe_column_order(es):
function test_replace_dataframe_different_woodwork_initialized (line 695) | def test_replace_dataframe_different_woodwork_initialized(es):
function test_replace_dataframe_and_min_last_time_index (line 724) | def test_replace_dataframe_and_min_last_time_index(es):
function test_replace_dataframe_dont_recalculate_last_time_index_present (line 750) | def test_replace_dataframe_dont_recalculate_last_time_index_present(es):
function test_replace_dataframe_dont_recalculate_last_time_index_not_present (line 772) | def test_replace_dataframe_dont_recalculate_last_time_index_not_present(...
function test_replace_dataframe_recalculate_last_time_index_not_present (line 794) | def test_replace_dataframe_recalculate_last_time_index_not_present(es):
function test_replace_dataframe_recalculate_last_time_index_present (line 818) | def test_replace_dataframe_recalculate_last_time_index_present(es):
function test_normalize_dataframe_loses_column_metadata (line 842) | def test_normalize_dataframe_loses_column_metadata(es):
function test_normalize_ww_init (line 877) | def test_normalize_ww_init():
FILE: featuretools/tests/entry_point_tests/add-ons/featuretools_primitives/featuretools_primitives/existing_primitive.py
class Sum (line 4) | class Sum(AggregationPrimitive):
FILE: featuretools/tests/entry_point_tests/add-ons/featuretools_primitives/featuretools_primitives/new_primitive.py
class NewPrimitive (line 4) | class NewPrimitive(TransformPrimitive):
FILE: featuretools/tests/entry_point_tests/test_plugin.py
function test_plugin_warning (line 8) | def test_plugin_warning():
FILE: featuretools/tests/entry_point_tests/test_primitives.py
function test_entry_point (line 9) | def test_entry_point():
FILE: featuretools/tests/entry_point_tests/utils.py
function _get_path_to_add_ons (line 6) | def _get_path_to_add_ons(*args):
function _python (line 11) | def _python(*args):
function _install_featuretools_plugin (line 16) | def _install_featuretools_plugin():
function _uninstall_featuretools_plugin (line 21) | def _uninstall_featuretools_plugin():
function _install_featuretools_primitives (line 25) | def _install_featuretools_primitives():
function _uninstall_featuretools_primitives (line 30) | def _uninstall_featuretools_primitives():
function _import_featuretools (line 34) | def _import_featuretools(level=None):
FILE: featuretools/tests/feature_discovery/test_convertors.py
function test_convert_featurebase_list_to_feature_list (line 31) | def test_convert_featurebase_list_to_feature_list():
function test_origin_feature_to_featurebase (line 89) | def test_origin_feature_to_featurebase():
function test_stacked_feature_to_featurebase (line 111) | def test_stacked_feature_to_featurebase():
function test_multi_output_to_featurebase (line 140) | def test_multi_output_to_featurebase():
function test_stacking_on_multioutput_to_featurebase (line 192) | def test_stacking_on_multioutput_to_featurebase():
FILE: featuretools/tests/feature_discovery/test_feature_collection.py
function test_to_keys_method (line 73) | def test_to_keys_method(feature_args, expected):
function test_feature_collection_hashing (line 81) | def test_feature_collection_hashing():
FILE: featuretools/tests/feature_discovery/test_feature_discovery.py
class MultiOutputPrimitiveForTest (line 51) | class MultiOutputPrimitiveForTest(TransformPrimitive):
class DoublePrimitiveForTest (line 58) | class DoublePrimitiveForTest(TransformPrimitive):
function test_column_schema_to_keys (line 75) | def test_column_schema_to_keys(column_schema, expected):
function test_index_input_set (line 94) | def test_index_input_set(column_list, expected):
function test_get_features (line 159) | def test_get_features(feature_args, input_set, commutative, expected):
function test_get_matching_features (line 244) | def test_get_matching_features(feature_args, primitive, expected):
function test_generate_features_from_primitives (line 275) | def test_generate_features_from_primitives(col_defs, primitives, expected):
function test_compare_dfs (line 330) | def test_compare_dfs(col_defs, primitives):
function test_generate_features_from_primitives_inputs (line 357) | def test_generate_features_from_primitives_inputs():
FILE: featuretools/tests/feature_discovery/test_type_defs.py
function test_feature_type_equality (line 28) | def test_feature_type_equality():
function test_feature_type_assertions (line 74) | def test_feature_type_assertions():
function test_feature_to_dict (line 91) | def test_feature_to_dict():
function test_feature_hash (line 118) | def test_feature_hash():
function test_feature_forced_name (line 155) | def test_feature_forced_name():
function test_feature_collection_to_dict (line 177) | def test_feature_collection_to_dict():
function test_feature_collection_from_dict (line 236) | def test_feature_collection_from_dict():
function test_feature_collection_serialization_roundtrip (line 299) | def test_feature_collection_serialization_roundtrip():
function test_lite_feature_assertions (line 361) | def test_lite_feature_assertions():
function test_lite_feature_to_column_schema (line 420) | def test_lite_feature_to_column_schema():
function test_lite_feature_to_dependent_primitives (line 435) | def test_lite_feature_to_dependent_primitives():
FILE: featuretools/tests/primitive_tests/aggregation_primitive_tests/test_agg_primitives.py
function test_nmostcommon_categorical (line 47) | def test_nmostcommon_categorical():
function test_agg_primitives_can_init_without_params (line 64) | def test_agg_primitives_can_init_without_params():
function test_trend_works_with_different_input_dtypes (line 70) | def test_trend_works_with_different_input_dtypes():
function test_percent_true_boolean (line 82) | def test_percent_true_boolean():
class TestAverageCountPerUnique (line 88) | class TestAverageCountPerUnique(PrimitiveTestBase):
method test_percent_unique (line 92) | def test_percent_unique(self):
method test_nans (line 96) | def test_nans(self):
method test_empty_string (line 104) | def test_empty_string(self):
method test_with_featuretools (line 109) | def test_with_featuretools(self, es):
class TestVariance (line 116) | class TestVariance(PrimitiveTestBase):
method test_regular (line 119) | def test_regular(self):
method test_single (line 123) | def test_single(self):
method test_double (line 127) | def test_double(self):
method test_empty (line 131) | def test_empty(self):
method test_nan (line 135) | def test_nan(self):
method test_allnan (line 142) | def test_allnan(self):
class TestFirstLastTimeDelta (line 150) | class TestFirstLastTimeDelta(PrimitiveTestBase):
method test_first_last_time_delta (line 155) | def test_first_last_time_delta(self):
method test_with_nans (line 159) | def test_with_nans(self):
method test_with_featuretools (line 165) | def test_with_featuretools(self, es):
class TestEntropy (line 172) | class TestEntropy(PrimitiveTestBase):
method test_regular (line 179) | def test_regular(self, dtype):
method test_empty (line 189) | def test_empty(self, dtype):
method test_args (line 199) | def test_args(self, dtype):
method test_with_featuretools (line 209) | def test_with_featuretools(self, es):
class TestKurtosis (line 216) | class TestKurtosis(PrimitiveTestBase):
method test_regular (line 223) | def test_regular(self, dtype):
method test_nan (line 250) | def test_nan(self):
method test_empty (line 260) | def test_empty(self, dtype):
method test_inf (line 266) | def test_inf(self):
method test_arg (line 277) | def test_arg(self):
method test_error (line 292) | def test_error(self):
method test_with_featuretools (line 296) | def test_with_featuretools(self, es):
class TestNumZeroCrossings (line 303) | class TestNumZeroCrossings(PrimitiveTestBase):
method test_nan (line 306) | def test_nan(self):
method test_empty (line 314) | def test_empty(self):
method test_inf (line 321) | def test_inf(self):
method test_zeros (line 334) | def test_zeros(self):
method test_regular (line 347) | def test_regular(self):
method test_with_featuretools (line 360) | def test_with_featuretools(self, es):
class TestNumTrueSinceLastFalse (line 367) | class TestNumTrueSinceLastFalse(PrimitiveTestBase):
method test_regular (line 370) | def test_regular(self):
method test_regular_end_in_false (line 377) | def test_regular_end_in_false(self):
method test_no_false (line 384) | def test_no_false(self):
method test_all_false (line 389) | def test_all_false(self):
method test_nan (line 396) | def test_nan(self):
method test_all_nan (line 403) | def test_all_nan(self):
method test_with_featuretools (line 408) | def test_with_featuretools(self, es):
class TestNumFalseSinceLastTrue (line 415) | class TestNumFalseSinceLastTrue(PrimitiveTestBase):
method test_regular (line 418) | def test_regular(self):
method test_regular_end_in_true (line 425) | def test_regular_end_in_true(self):
method test_no_true (line 432) | def test_no_true(self):
method test_all_true (line 437) | def test_all_true(self):
method test_nan (line 444) | def test_nan(self):
method test_all_nan (line 451) | def test_all_nan(self):
method test_numeric_and_string_input (line 456) | def test_numeric_and_string_input(self):
method test_with_featuretools (line 463) | def test_with_featuretools(self, es):
class TestNumPeaks (line 470) | class TestNumPeaks(PrimitiveTestBase):
method test_negative_and_positive_nums (line 477) | def test_negative_and_positive_nums(self, dtype):
method test_plateu (line 488) | def test_plateu(self, dtype):
method test_regular (line 552) | def test_regular(self, dtype):
method test_no_peak (line 561) | def test_no_peak(self, dtype):
method test_too_small_data (line 570) | def test_too_small_data(self, dtype):
method test_nans (line 582) | def test_nans(self, dtype):
method test_with_featuretools (line 611) | def test_with_featuretools(self, es):
class TestDateFirstEvent (line 618) | class TestDateFirstEvent(PrimitiveTestBase):
method test_regular (line 621) | def test_regular(self):
method test_nat (line 636) | def test_nat(self):
method test_empty (line 651) | def test_empty(self):
method test_with_featuretools (line 657) | def test_with_featuretools(self, es):
method test_serialize (line 663) | def test_serialize(self, es):
class TestMinCount (line 667) | class TestMinCount(PrimitiveTestBase):
method test_nan (line 670) | def test_nan(self):
method test_inf (line 676) | def test_inf(self):
method test_regular (line 682) | def test_regular(self):
method test_skipna (line 693) | def test_skipna(self):
method test_ninf (line 699) | def test_ninf(self):
method test_with_featuretools (line 705) | def test_with_featuretools(self, es):
class TestMaxCount (line 712) | class TestMaxCount(PrimitiveTestBase):
method test_nan (line 715) | def test_nan(self):
method test_inf (line 721) | def test_inf(self):
method test_regular (line 727) | def test_regular(self):
method test_skipna (line 738) | def test_skipna(self):
method test_ninf (line 744) | def test_ninf(self):
method test_with_featuretools (line 750) | def test_with_featuretools(self, es):
class TestMaxMinDelta (line 757) | class TestMaxMinDelta(PrimitiveTestBase):
method test_max_min_delta (line 761) | def test_max_min_delta(self):
method test_nans (line 765) | def test_nans(self):
method test_with_featuretools (line 773) | def test_with_featuretools(self, es):
class TestMedianCount (line 780) | class TestMedianCount(PrimitiveTestBase):
method test_regular (line 783) | def test_regular(self):
method test_nans (line 789) | def test_nans(self):
method test_with_featuretools (line 798) | def test_with_featuretools(self, es):
class TestNMostCommonFrequency (line 805) | class TestNMostCommonFrequency(PrimitiveTestBase):
method test_regular (line 808) | def test_regular(self):
method test_n_larger_than_len (line 828) | def test_n_larger_than_len(self):
method test_skipna (line 846) | def test_skipna(self):
method test_with_featuretools (line 854) | def test_with_featuretools(self, es):
method test_with_featuretools_args (line 866) | def test_with_featuretools_args(self, es):
method test_serialize (line 878) | def test_serialize(self, es):
class TestNUniqueDays (line 886) | class TestNUniqueDays(PrimitiveTestBase):
method test_two_years (line 889) | def test_two_years(self):
method test_leap_year (line 894) | def test_leap_year(self):
method test_ten_years (line 899) | def test_ten_years(self):
method test_distinct_dt (line 904) | def test_distinct_dt(self):
method test_NaT (line 917) | def test_NaT(self):
method test_with_featuretools (line 923) | def test_with_featuretools(self, es):
class TestNUniqueDaysOfCalendarYear (line 930) | class TestNUniqueDaysOfCalendarYear(PrimitiveTestBase):
method test_two_years (line 933) | def test_two_years(self):
method test_leap_year (line 938) | def test_leap_year(self):
method test_ten_years (line 943) | def test_ten_years(self):
method test_distinct_dt (line 948) | def test_distinct_dt(self):
method test_NaT (line 961) | def test_NaT(self):
method test_with_featuretools (line 967) | def test_with_featuretools(self, es):
class TestNUniqueDaysOfMonth (line 974) | class TestNUniqueDaysOfMonth(PrimitiveTestBase):
method test_two_days (line 977) | def test_two_days(self):
method test_one_year (line 982) | def test_one_year(self):
method test_leap_year (line 987) | def test_leap_year(self):
method test_distinct_dt (line 992) | def test_distinct_dt(self):
method test_NaT (line 1005) | def test_NaT(self):
method test_with_featuretools (line 1011) | def test_with_featuretools(self, es):
class TestNUniqueMonths (line 1018) | class TestNUniqueMonths(PrimitiveTestBase):
method test_two_days (line 1021) | def test_two_days(self):
method test_ten_years (line 1026) | def test_ten_years(self):
method test_distinct_dt (line 1031) | def test_distinct_dt(self):
method test_NaT (line 1044) | def test_NaT(self):
method test_with_featuretools (line 1050) | def test_with_featuretools(self, es):
class TestNUniqueWeeks (line 1057) | class TestNUniqueWeeks(PrimitiveTestBase):
method test_same_week (line 1060) | def test_same_week(self):
method test_ten_years (line 1065) | def test_ten_years(self):
method test_distinct_dt (line 1070) | def test_distinct_dt(self):
method test_NaT (line 1084) | def test_NaT(self):
method test_with_featuretools (line 1090) | def test_with_featuretools(self, es):
class TestHasNoDuplicates (line 1097) | class TestHasNoDuplicates(PrimitiveTestBase):
method test_regular (line 1100) | def test_regular(self):
method test_nan (line 1121) | def test_nan(self):
method test_with_featuretools (line 1137) | def test_with_featuretools(self, es):
class TestIsMonotonicallyDecreasing (line 1151) | class TestIsMonotonicallyDecreasing(PrimitiveTestBase):
method test_monotonically_decreasing (line 1154) | def test_monotonically_decreasing(self):
method test_monotonically_increasing (line 1159) | def test_monotonically_increasing(self):
method test_non_monotonic (line 1164) | def test_non_monotonic(self):
method test_weakly_decreasing (line 1169) | def test_weakly_decreasing(self):
method test_nan (line 1174) | def test_nan(self):
method test_with_featuretools (line 1183) | def test_with_featuretools(self, es):
class TestIsMonotonicallyIncreasing (line 1190) | class TestIsMonotonicallyIncreasing(PrimitiveTestBase):
method test_monotonically_increasing (line 1193) | def test_monotonically_increasing(self):
method test_monotonically_decreasing (line 1198) | def test_monotonically_decreasing(self):
method test_non_monotonic (line 1203) | def test_non_monotonic(self):
method test_weakly_increasing (line 1208) | def test_weakly_increasing(self):
method test_nan (line 1213) | def test_nan(self):
method test_with_featuretools (line 1222) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/aggregation_primitive_tests/test_count_aggregation_primitives.py
class TestCountAboveMean (line 17) | class TestCountAboveMean(PrimitiveTestBase):
method test_regular (line 20) | def test_regular(self):
method test_nan_without_ignore_nan (line 33) | def test_nan_without_ignore_nan(self):
method test_nan_with_ignore_nan (line 46) | def test_nan_with_ignore_nan(self):
method test_inf (line 65) | def test_inf(self):
class TestCountGreaterThan (line 89) | class TestCountGreaterThan(PrimitiveTestBase):
method compare_results (line 92) | def compare_results(self, data, thresholds, results):
method test_regular (line 99) | def test_regular(self):
method test_edges (line 105) | def test_edges(self):
method test_nans (line 111) | def test_nans(self):
class TestCountInsideNthSTD (line 118) | class TestCountInsideNthSTD:
method test_normal_distribution (line 121) | def test_normal_distribution(self):
method test_poisson_distribution (line 157) | def test_poisson_distribution(self):
method test_nan (line 193) | def test_nan(self):
method test_negative_n (line 232) | def test_negative_n(self):
class TestCountInsideRange (line 237) | class TestCountInsideRange(PrimitiveTestBase):
method test_integer_range (line 240) | def test_integer_range(self):
method test_float_range (line 255) | def test_float_range(self):
method test_nan (line 270) | def test_nan(self):
method test_inf (line 282) | def test_inf(self):
class TestCountLessThan (line 302) | class TestCountLessThan(PrimitiveTestBase):
method compare_answers (line 305) | def compare_answers(self, data, thresholds, answers):
method test_regular (line 312) | def test_regular(self):
method test_edges (line 318) | def test_edges(self):
method test_nans (line 324) | def test_nans(self):
class TestCountOutsideNthSTD (line 331) | class TestCountOutsideNthSTD(PrimitiveTestBase):
method test_normal_distribution (line 334) | def test_normal_distribution(self):
method test_poisson_distribution (line 438) | def test_poisson_distribution(self):
method test_nan (line 472) | def test_nan(self):
method test_negative_n (line 510) | def test_negative_n(self):
class TestCountOutsideRange (line 515) | class TestCountOutsideRange(PrimitiveTestBase):
method test_integer_range (line 518) | def test_integer_range(self):
method test_float_range (line 533) | def test_float_range(self):
method test_nan (line 548) | def test_nan(self):
method test_inf (line 559) | def test_inf(self):
FILE: featuretools/tests/primitive_tests/aggregation_primitive_tests/test_max_consecutive.py
class TestMaxConsecutiveFalse (line 14) | class TestMaxConsecutiveFalse:
method test_regular (line 15) | def test_regular(self):
method test_all_true (line 21) | def test_all_true(self):
method test_all_false (line 27) | def test_all_false(self):
class TestMaxConsecutiveTrue (line 34) | class TestMaxConsecutiveTrue:
method test_regular (line 35) | def test_regular(self):
method test_all_true (line 41) | def test_all_true(self):
method test_all_false (line 47) | def test_all_false(self):
class TestMaxConsecutiveNegatives (line 55) | class TestMaxConsecutiveNegatives:
method test_regular (line 56) | def test_regular(self, dtype):
method test_all_int (line 64) | def test_all_int(self, dtype):
method test_all_float (line 70) | def test_all_float(self, dtype):
method test_with_nan (line 78) | def test_with_nan(self, dtype):
method test_with_nan_skipna (line 86) | def test_with_nan_skipna(self, dtype):
method test_all_nan (line 94) | def test_all_nan(self, dtype):
method test_all_nan_skipna (line 102) | def test_all_nan_skipna(self, dtype):
class TestMaxConsecutivePositives (line 112) | class TestMaxConsecutivePositives:
method test_regular (line 113) | def test_regular(self, dtype):
method test_all_int (line 121) | def test_all_int(self, dtype):
method test_all_float (line 127) | def test_all_float(self, dtype):
method test_with_nan (line 135) | def test_with_nan(self, dtype):
method test_with_nan_skipna (line 143) | def test_with_nan_skipna(self, dtype):
method test_all_nan (line 151) | def test_all_nan(self, dtype):
method test_all_nan_skipna (line 159) | def test_all_nan_skipna(self, dtype):
class TestMaxConsecutiveZeros (line 169) | class TestMaxConsecutiveZeros:
method test_regular (line 170) | def test_regular(self, dtype):
method test_all_int (line 178) | def test_all_int(self, dtype):
method test_all_float (line 184) | def test_all_float(self, dtype):
method test_with_nan (line 192) | def test_with_nan(self, dtype):
method test_with_nan_skipna (line 200) | def test_with_nan_skipna(self, dtype):
method test_all_nan (line 208) | def test_all_nan(self, dtype):
method test_all_nan_skipna (line 216) | def test_all_nan_skipna(self, dtype):
FILE: featuretools/tests/primitive_tests/aggregation_primitive_tests/test_num_consecutive.py
class TestNumConsecutiveGreaterMean (line 7) | class TestNumConsecutiveGreaterMean:
method test_continuous_range (line 10) | def test_continuous_range(self):
method test_subsequence_in_middle (line 17) | def test_subsequence_in_middle(self):
method test_subsequence_at_start (line 47) | def test_subsequence_at_start(self):
method test_subsequence_at_end (line 77) | def test_subsequence_at_end(self):
method test_nan (line 107) | def test_nan(self):
method test_inf (line 122) | def test_inf(self):
class TestNumConsecutiveLessMean (line 139) | class TestNumConsecutiveLessMean:
method test_continuous_range (line 142) | def test_continuous_range(self):
method test_subsequence_in_middle (line 149) | def test_subsequence_in_middle(self):
method test_subsequence_at_start (line 179) | def test_subsequence_at_start(self):
method test_subsequence_at_end (line 209) | def test_subsequence_at_end(self):
method test_nan (line 239) | def test_nan(self):
method test_inf (line 254) | def test_inf(self):
FILE: featuretools/tests/primitive_tests/aggregation_primitive_tests/test_percent_true.py
function test_percent_true_default_value_with_dfs (line 7) | def test_percent_true_default_value_with_dfs():
FILE: featuretools/tests/primitive_tests/aggregation_primitive_tests/test_rolling_primitive.py
function test_rolling_max (line 30) | def test_rolling_max(min_periods, window_length, gap, window_series):
function test_rolling_min (line 70) | def test_rolling_min(min_periods, window_length, gap, window_series):
function test_rolling_mean (line 111) | def test_rolling_mean(min_periods, window_length, gap, window_series):
function test_rolling_std (line 152) | def test_rolling_std(min_periods, window_length, gap, window_series):
function test_rolling_count (line 197) | def test_rolling_count(window_length, gap, window_series):
function test_rolling_count_primitive_min_periods_nans (line 232) | def test_rolling_count_primitive_min_periods_nans(
function test_rolling_count_with_no_gap (line 255) | def test_rolling_count_with_no_gap(
function test_rolling_trend (line 315) | def test_rolling_trend(window_length, gap, expected_vals, window_series):
function test_rolling_trend_window_length_less_than_three (line 323) | def test_rolling_trend_window_length_less_than_three(window_series):
function test_rolling_primitives_non_uniform (line 342) | def test_rolling_primitives_non_uniform(primitive):
function test_rolling_std_non_uniform (line 367) | def test_rolling_std_non_uniform():
function test_rolling_trend_non_uniform (line 400) | def test_rolling_trend_non_uniform():
function test_rolling_outlier_count (line 440) | def test_rolling_outlier_count(
FILE: featuretools/tests/primitive_tests/aggregation_primitive_tests/test_time_since.py
class TestTimeSinceLastFalse (line 15) | class TestTimeSinceLastFalse:
method test_booleans (line 24) | def test_booleans(self):
method test_booleans_reversed (line 36) | def test_booleans_reversed(self):
method test_no_false (line 49) | def test_no_false(self):
method test_nans (line 55) | def test_nans(self):
method test_empty (line 73) | def test_empty(self):
class TestTimeSinceLastMax (line 86) | class TestTimeSinceLastMax:
method test_primitive_func_1 (line 97) | def test_primitive_func_1(self):
method test_no_max (line 108) | def test_no_max(self):
method test_nans (line 116) | def test_nans(self):
class TestTimeSinceLastMin (line 134) | class TestTimeSinceLastMin:
method test_primitive_func_1 (line 145) | def test_primitive_func_1(self):
method test_no_max (line 156) | def test_no_max(self):
method test_nans (line 164) | def test_nans(self):
class TestTimeSinceLastTrue (line 184) | class TestTimeSinceLastTrue:
method test_primitive_func_1 (line 195) | def test_primitive_func_1(self):
method test_no_true (line 206) | def test_no_true(self):
method test_nans (line 212) | def test_nans(self):
method test_no_cutofftime (line 231) | def test_no_cutofftime(self):
method test_empty (line 237) | def test_empty(self):
FILE: featuretools/tests/primitive_tests/bad_primitive_files/multiple_primitives.py
class CustomMax (line 6) | class CustomMax(AggregationPrimitive):
class CustomSum (line 12) | class CustomSum(AggregationPrimitive):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_count_string.py
class TestCountString (line 12) | class TestCountString(PrimitiveTestBase):
method compare (line 15) | def compare(self, primitive_initiated, test_cases, answers):
method test_non_regex_with_no_other_parameters (line 38) | def test_non_regex_with_no_other_parameters(self):
method test_non_regex_ignore_case (line 49) | def test_non_regex_ignore_case(self):
method test_non_regex_ignore_non_alphanumeric (line 70) | def test_non_regex_ignore_non_alphanumeric(self):
method test_non_regex_match_whole_words_only (line 81) | def test_non_regex_match_whole_words_only(self):
method test_non_regex_with_all_others_parameters (line 93) | def test_non_regex_with_all_others_parameters(self):
method test_regex_with_no_other_parameters (line 105) | def test_regex_with_no_other_parameters(self):
method test_regex_with_ignore_case (line 117) | def test_regex_with_ignore_case(self):
method test_regex_with_ignore_non_alphanumeric (line 129) | def test_regex_with_ignore_non_alphanumeric(self):
method test_regex_with_match_whole_words_only (line 141) | def test_regex_with_match_whole_words_only(self):
method test_regex_with_all_other_parameters (line 153) | def test_regex_with_all_other_parameters(self):
method test_overlapping_regex (line 165) | def test_overlapping_regex(self):
method test_the (line 177) | def test_the(self):
method test_nan (line 190) | def test_nan(self):
method test_with_featuretools (line 204) | def test_with_featuretools(self, es):
method test_with_featuretools_nan (line 216) | def test_with_featuretools_nan(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_mean_characters_per_word.py
class TestMeanCharactersPerWord (line 13) | class TestMeanCharactersPerWord(PrimitiveTestBase):
method test_sentences (line 16) | def test_sentences(self):
method test_punctuation (line 30) | def test_punctuation(self):
method test_multiline (line 44) | def test_multiline(self):
method test_nans (line 60) | def test_nans(self, na_value):
method test_all_nans (line 70) | def test_all_nans(self, na_value):
method test_with_featuretools (line 76) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_median_word_length.py
class TestMedianWordLength (line 12) | class TestMedianWordLength(PrimitiveTestBase):
method test_delimiter_override (line 15) | def test_delimiter_override(self):
method test_multiline (line 24) | def test_multiline(self):
method test_null (line 36) | def test_null(self):
method test_with_featuretools (line 43) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_natural_language_primitives_terminate.py
class TestNaturalLanguagePrimitivesTerminate (line 9) | class TestNaturalLanguagePrimitivesTerminate:
method test_natlang_primitive_does_not_timeout (line 15) | def test_natlang_primitive_does_not_timeout(
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_num_characters.py
class TestNumCharacters (line 12) | class TestNumCharacters(PrimitiveTestBase):
method test_general (line 15) | def test_general(self):
method test_special_characters_and_whitespace (line 27) | def test_special_characters_and_whitespace(self):
method test_unicode_input (line 33) | def test_unicode_input(self):
method test_null (line 43) | def test_null(self):
method test_with_featuretools (line 54) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_num_unique_separators.py
class TestNumUniqueSeparators (line 12) | class TestNumUniqueSeparators(PrimitiveTestBase):
method test_punctuation (line 15) | def test_punctuation(self):
method test_other_delimeters (line 29) | def test_other_delimeters(self):
method test_multiline (line 35) | def test_multiline(self):
method test_nans (line 47) | def test_nans(self):
method test_with_featuretools (line 53) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_num_words.py
class TestNumWords (line 12) | class TestNumWords(PrimitiveTestBase):
method test_general (line 15) | def test_general(self):
method test_special_characters_and_whitespace (line 27) | def test_special_characters_and_whitespace(self):
method test_unicode_input (line 33) | def test_unicode_input(self):
method test_contractions (line 43) | def test_contractions(self):
method test_multiple_spaces (line 53) | def test_multiple_spaces(self):
method test_null (line 64) | def test_null(self):
method test_with_featuretools (line 75) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_number_of_common_words.py
class TestNumberOfCommonWords (line 12) | class TestNumberOfCommonWords(PrimitiveTestBase):
method test_delimiter_override (line 16) | def test_delimiter_override(self):
method test_multiline (line 32) | def test_multiline(self):
method test_null (line 44) | def test_null(self):
method test_case_insensitive (line 51) | def test_case_insensitive(self):
method test_with_featuretools (line 58) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_number_of_hashtags.py
class TestNumberOfHashtags (line 12) | class TestNumberOfHashtags(PrimitiveTestBase):
method test_regular_input (line 15) | def test_regular_input(self):
method test_unicode_input (line 27) | def test_unicode_input(self):
method test_multiline (line 39) | def test_multiline(self):
method test_null (line 51) | def test_null(self):
method test_alphanumeric_and_special (line 58) | def test_alphanumeric_and_special(self):
method test_underscore (line 65) | def test_underscore(self):
method test_with_featuretools (line 72) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_number_of_mentions.py
class TestNumberOfMentions (line 12) | class TestNumberOfMentions(PrimitiveTestBase):
method test_regular_input (line 15) | def test_regular_input(self):
method test_unicode_input (line 27) | def test_unicode_input(self):
method test_multiline (line 41) | def test_multiline(self):
method test_null (line 53) | def test_null(self):
method test_alphanumeric_and_special (line 60) | def test_alphanumeric_and_special(self):
method test_underscore (line 67) | def test_underscore(self):
method test_with_featuretools (line 74) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_number_of_unique_words.py
class TestNumberOfUniqueWords (line 12) | class TestNumberOfUniqueWords(PrimitiveTestBase):
method test_general (line 15) | def test_general(self):
method test_special_characters_and_whitespace (line 28) | def test_special_characters_and_whitespace(self):
method test_unicode_input (line 35) | def test_unicode_input(self):
method test_contractions (line 46) | def test_contractions(self):
method test_multiline (line 57) | def test_multiline(self):
method test_null (line 69) | def test_null(self):
method test_case_insensitive (line 76) | def test_case_insensitive(self):
method test_with_featuretools (line 83) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_number_of_words_in_quotes.py
class TestNumberOfWordsInQuotes (line 13) | class TestNumberOfWordsInQuotes(PrimitiveTestBase):
method test_regular_double_quotes_input (line 16) | def test_regular_double_quotes_input(self):
method test_captures_regular_single_quotes (line 36) | def test_captures_regular_single_quotes(self):
method test_captures_both_single_and_double_quotes (line 53) | def test_captures_both_single_and_double_quotes(self):
method test_unicode_input (line 63) | def test_unicode_input(self):
method test_multiline (line 74) | def test_multiline(self):
method test_raises_error_invalid_args (line 84) | def test_raises_error_invalid_args(self):
method test_null (line 94) | def test_null(self):
method test_with_featuretools (line 100) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_punctuation_count.py
class TestPunctuationCount (line 12) | class TestPunctuationCount(PrimitiveTestBase):
method test_punctuation (line 15) | def test_punctuation(self):
method test_multiline (line 29) | def test_multiline(self):
method test_nan (line 40) | def test_nan(self):
method test_with_featuretools (line 46) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_title_word_count.py
class TestTitleWordCount (line 12) | class TestTitleWordCount(PrimitiveTestBase):
method test_strings (line 15) | def test_strings(self):
method test_nan (line 28) | def test_nan(self):
method test_with_featuretools (line 34) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_total_word_length.py
class TestTotalWordLength (line 12) | class TestTotalWordLength(PrimitiveTestBase):
method test_delimiter_override (line 15) | def test_delimiter_override(self):
method test_multiline (line 24) | def test_multiline(self):
method test_null (line 36) | def test_null(self):
method test_with_featuretools (line 43) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_upper_case_count.py
class TestUpperCaseCount (line 12) | class TestUpperCaseCount(PrimitiveTestBase):
method test_strings (line 15) | def test_strings(self):
method test_nan (line 23) | def test_nan(self):
method test_with_featuretools (line 29) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_upper_case_word_count.py
class TestUpperCaseWordCount (line 7) | class TestUpperCaseWordCount:
method test_strings (line 10) | def test_strings(self):
method test_nan (line 29) | def test_nan(self):
FILE: featuretools/tests/primitive_tests/natural_language_primitives_tests/test_whitespace_count.py
class TestWhitespaceCount (line 12) | class TestWhitespaceCount(PrimitiveTestBase):
method compare (line 15) | def compare(self, primitive_initiated, test_cases, answers):
method test_strings (line 20) | def test_strings(self):
method test_nan (line 27) | def test_nan(self):
method test_with_featuretools (line 32) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/primitives_to_install/custom_max.py
class CustomMax (line 6) | class CustomMax(AggregationPrimitive):
FILE: featuretools/tests/primitive_tests/primitives_to_install/custom_mean.py
class CustomMean (line 6) | class CustomMean(AggregationPrimitive):
FILE: featuretools/tests/primitive_tests/primitives_to_install/custom_sum.py
class CustomSum (line 6) | class CustomSum(AggregationPrimitive):
FILE: featuretools/tests/primitive_tests/test_absolute_diff.py
class TestAbsoluteDiff (line 8) | class TestAbsoluteDiff:
method test_nan (line 9) | def test_nan(self):
method test_regular (line 16) | def test_regular(self):
method test_method (line 23) | def test_method(self):
method test_limit (line 30) | def test_limit(self):
method test_zero (line 37) | def test_zero(self):
method test_empty (line 44) | def test_empty(self):
method test_inf (line 51) | def test_inf(self):
method test_raises (line 58) | def test_raises(self):
FILE: featuretools/tests/primitive_tests/test_agg_feats.py
function reset_dfs_cache (line 41) | def reset_dfs_cache():
function test_get_depth (line 46) | def test_get_depth(es):
function test_makes_count (line 68) | def test_makes_count(es):
function test_count_null (line 87) | def test_count_null(es):
function test_check_input_types (line 126) | def test_check_input_types(es):
function test_mean_nan (line 145) | def test_mean_nan(es):
function test_init_and_name (line 183) | def test_init_and_name(es):
function test_invalid_init_args (line 225) | def test_invalid_init_args(diamond_es):
function test_init_with_multiple_possible_paths (line 261) | def test_init_with_multiple_possible_paths(diamond_es):
function test_init_with_single_possible_path (line 283) | def test_init_with_single_possible_path(diamond_es):
function test_init_with_no_path (line 295) | def test_init_with_no_path(diamond_es):
function test_name_with_multiple_possible_paths (line 313) | def test_name_with_multiple_possible_paths(diamond_es):
function test_copy (line 326) | def test_copy(games_es):
function test_serialization (line 344) | def test_serialization(es):
function test_time_since_last (line 401) | def test_time_since_last(es):
function test_time_since_first (line 419) | def test_time_since_first(es):
function test_median (line 437) | def test_median(es):
function test_agg_same_method_name (line 454) | def test_agg_same_method_name(es):
function test_time_since_last_custom (line 530) | def test_time_since_last_custom(es):
function test_custom_primitive_multiple_inputs (line 563) | def test_custom_primitive_multiple_inputs(es):
function test_custom_primitive_default_kwargs (line 608) | def test_custom_primitive_default_kwargs(es):
function test_makes_numtrue (line 637) | def test_makes_numtrue(es):
function test_make_three_most_common (line 649) | def test_make_three_most_common(es):
function test_stacking_multi (line 698) | def test_stacking_multi(es):
function test_use_previous_pd_dateoffset (line 729) | def test_use_previous_pd_dateoffset(es):
function _assert_agg_feats_equal (line 747) | def _assert_agg_feats_equal(f1, f2):
function test_override_multi_feature_names (line 755) | def test_override_multi_feature_names(es):
FILE: featuretools/tests/primitive_tests/test_all_primitive_docstrings.py
function docstring_is_uniform (line 4) | def docstring_is_uniform(primitive):
function test_transform_primitive_docstrings (line 26) | def test_transform_primitive_docstrings():
function test_aggregation_primitive_docstrings (line 31) | def test_aggregation_primitive_docstrings():
FILE: featuretools/tests/primitive_tests/test_direct_features.py
function test_direct_from_identity (line 27) | def test_direct_from_identity(es):
function test_direct_from_column (line 39) | def test_direct_from_column(es):
function test_direct_rename_multioutput (line 52) | def test_direct_rename_multioutput(es):
function test_direct_rename (line 69) | def test_direct_rename(es):
function test_direct_copy (line 85) | def test_direct_copy(games_es):
function test_direct_of_multi_output_transform_feat (line 100) | def test_direct_of_multi_output_transform_feat(es):
function test_direct_features_of_multi_output_agg_primitives (line 140) | def test_direct_features_of_multi_output_agg_primitives(es):
function test_direct_with_invalid_init_args (line 202) | def test_direct_with_invalid_init_args(diamond_es):
function test_direct_with_multiple_possible_paths (line 225) | def test_direct_with_multiple_possible_paths(games_es):
function test_direct_with_single_possible_path (line 248) | def test_direct_with_single_possible_path(es):
function test_direct_with_no_path (line 254) | def test_direct_with_no_path(diamond_es):
function test_serialization (line 264) | def test_serialization(es):
FILE: featuretools/tests/primitive_tests/test_feature_base.py
function test_copy_features_does_not_copy_entityset (line 26) | def test_copy_features_does_not_copy_entityset(es):
function test_get_dependencies (line 58) | def test_get_dependencies(es):
function test_get_depth (line 76) | def test_get_depth(es):
function test_squared (line 95) | def test_squared(es):
function test_return_type_inference (line 104) | def test_return_type_inference(es):
function test_return_type_inference_direct_feature (line 116) | def test_return_type_inference_direct_feature(es):
function test_return_type_inference_index (line 129) | def test_return_type_inference_index(es):
function test_return_type_inference_datetime_time_index (line 139) | def test_return_type_inference_datetime_time_index(es):
function test_return_type_inference_numeric_time_index (line 148) | def test_return_type_inference_numeric_time_index(int_es):
function test_return_type_inference_id (line 157) | def test_return_type_inference_id(es):
function test_set_data_path (line 177) | def test_set_data_path(es):
function test_to_dictionary_direct (line 207) | def test_to_dictionary_direct(es):
function test_to_dictionary_identity (line 231) | def test_to_dictionary_identity(es):
function test_to_dictionary_agg (line 247) | def test_to_dictionary_agg(es):
function test_to_dictionary_where (line 278) | def test_to_dictionary_where(es):
function test_to_dictionary_trans (line 310) | def test_to_dictionary_trans(es):
function test_to_dictionary_groupby_trans (line 327) | def test_to_dictionary_groupby_trans(es):
function test_to_dictionary_multi_slice (line 350) | def test_to_dictionary_multi_slice(es):
function test_multi_output_base_error_agg (line 370) | def test_multi_output_base_error_agg(es):
function test_multi_output_base_error_trans (line 382) | def test_multi_output_base_error_trans(es):
function test_multi_output_attributes (line 396) | def test_multi_output_attributes(es):
function test_multi_output_index_error (line 413) | def test_multi_output_index_error(es):
function test_rename (line 438) | def test_rename(es):
function test_rename_multioutput (line 449) | def test_rename_multioutput(es):
function test_rename_featureoutputslice (line 460) | def test_rename_featureoutputslice(es):
function test_set_feature_names_wrong_number_of_names (line 472) | def test_set_feature_names_wrong_number_of_names(es):
function test_set_feature_names_not_unique (line 486) | def test_set_feature_names_not_unique(es):
function test_set_feature_names_error_on_single_output_feature (line 498) | def test_set_feature_names_error_on_single_output_feature(es):
function test_set_feature_names_transform_feature (line 506) | def test_set_feature_names_transform_feature(es):
function test_set_feature_names_aggregation_feature (line 519) | def test_set_feature_names_aggregation_feature(es):
function test_renaming_resets_feature_output_names_to_default (line 530) | def test_renaming_resets_feature_output_names_to_default(es):
function test_base_of_and_stack_on_heuristic (line 544) | def test_base_of_and_stack_on_heuristic(es, test_aggregation_primitive):
function test_stack_on_self (line 601) | def test_stack_on_self(es, test_transform_primitive):
FILE: featuretools/tests/primitive_tests/test_feature_descriptions.py
function test_identity_description (line 30) | def test_identity_description(es):
function test_direct_description (line 37) | def test_direct_description(es):
function test_transform_description (line 73) | def test_transform_description(es):
function test_groupby_transform_description (line 79) | def test_groupby_transform_description(es):
function test_aggregation_description (line 90) | def test_aggregation_description(es):
function test_aggregation_description_where (line 107) | def test_aggregation_description_where(es):
function test_aggregation_description_use_previous (line 126) | def test_aggregation_description_use_previous(es):
function test_multioutput_description (line 138) | def test_multioutput_description(es):
function test_generic_description (line 213) | def test_generic_description(es):
function test_column_description (line 257) | def test_column_description(es):
function test_metadata (line 267) | def test_metadata(es, tmp_path):
FILE: featuretools/tests/primitive_tests/test_feature_serialization.py
function reset_dfs_cache (line 73) | def reset_dfs_cache():
function assert_features (line 78) | def assert_features(original, deserialized):
function pickle_features_test_helper (line 84) | def pickle_features_test_helper(es_size, features_original, dir_path):
function test_pickle_features (line 111) | def test_pickle_features(es, tmp_path):
function test_pickle_features_with_custom_primitive (line 120) | def test_pickle_features_with_custom_primitive(es, tmp_path):
function test_serialized_renamed_features (line 137) | def test_serialized_renamed_features(es):
function s3_client (line 205) | def s3_client():
function s3_bucket (line 217) | def s3_bucket(s3_client, region="us-east-2"):
function test_serialize_features_mock_s3 (line 228) | def test_serialize_features_mock_s3(es, s3_client, s3_bucket):
function test_serialize_features_mock_anon_s3 (line 244) | def test_serialize_features_mock_anon_s3(es, s3_client, s3_bucket):
function test_s3_test_profile (line 261) | def test_s3_test_profile(es, s3_client, s3_bucket, setup_test_profile, p...
function test_deserialize_features_s3 (line 278) | def test_deserialize_features_s3(es, url, profile_name):
function test_serialize_url (line 306) | def test_serialize_url(es):
function test_custom_feature_names_retained_during_serialization (line 317) | def test_custom_feature_names_retained_during_serialization(es, tmp_path):
function test_deserializer_uses_common_primitive_instances_no_args (line 365) | def test_deserializer_uses_common_primitive_instances_no_args(es, tmp_pa...
function test_deserializer_uses_common_primitive_instances_with_args (line 402) | def test_deserializer_uses_common_primitive_instances_with_args(es, tmp_...
function test_can_serialize_word_set_for_number_of_common_words_feature (line 516) | def test_can_serialize_word_set_for_number_of_common_words_feature(es):
FILE: featuretools/tests/primitive_tests/test_feature_utils.py
function test_is_valid_input (line 7) | def test_is_valid_input():
FILE: featuretools/tests/primitive_tests/test_feature_visualizer.py
function simple_feat (line 21) | def simple_feat(es):
function trans_feat (line 26) | def trans_feat(es):
function test_returns_digraph_object (line 30) | def test_returns_digraph_object(simple_feat):
function test_saving_png_file (line 35) | def test_saving_png_file(simple_feat, tmp_path):
function test_missing_file_extension (line 41) | def test_missing_file_extension(simple_feat):
function test_invalid_format (line 47) | def test_invalid_format(simple_feat):
function test_transform (line 53) | def test_transform(es, trans_feat):
function test_html_symbols (line 76) | def test_html_symbols(es, tmp_path):
function test_groupby_transform (line 91) | def test_groupby_transform(es):
function test_groupby_transform_direct_groupby (line 133) | def test_groupby_transform_direct_groupby(es):
function test_aggregation (line 202) | def test_aggregation(es):
function test_multioutput (line 253) | def test_multioutput(es):
function test_direct (line 309) | def test_direct(es):
function test_stacked (line 373) | def test_stacked(es, trans_feat):
function test_description_auto_caption (line 420) | def test_description_auto_caption(trans_feat):
function test_description_auto_caption_metadata (line 426) | def test_description_auto_caption_metadata(trans_feat, tmp_path):
function test_description_custom_caption (line 454) | def test_description_custom_caption(trans_feat):
FILE: featuretools/tests/primitive_tests/test_features_deserializer.py
function test_single_feature (line 27) | def test_single_feature(es):
function test_multioutput_feature (line 43) | def test_multioutput_feature(es):
function test_base_features_in_list (line 86) | def test_base_features_in_list(es):
function test_base_features_not_in_list (line 110) | def test_base_features_not_in_list(es):
function test_later_schema_version (line 148) | def test_later_schema_version(es, caplog, hardcoded_schema_version, warns):
function test_earlier_schema_version (line 170) | def test_earlier_schema_version(es, caplog, hardcoded_schema_version, wa...
function test_unknown_feature_type (line 186) | def test_unknown_feature_type(es):
function test_unknown_primitive_type (line 204) | def test_unknown_primitive_type(es):
function test_unknown_primitive_module (line 228) | def test_unknown_primitive_module(es):
function test_feature_use_previous_pd_timedelta (line 252) | def test_feature_use_previous_pd_timedelta(es):
function test_feature_use_previous_pd_dateoffset (line 282) | def test_feature_use_previous_pd_dateoffset(es):
function test_word_set_in_number_of_common_words_is_deserialized_back_into_a_set (line 339) | def test_word_set_in_number_of_common_words_is_deserialized_back_into_a_...
function _check_schema_version (line 366) | def _check_schema_version(version, es, warning_text, caplog, warning_typ...
FILE: featuretools/tests/primitive_tests/test_features_serializer.py
function test_single_feature (line 23) | def test_single_feature(es):
function test_base_features_in_list (line 39) | def test_base_features_in_list(es):
function test_multi_output_features (line 66) | def test_multi_output_features(es):
function test_base_features_not_in_list (line 110) | def test_base_features_not_in_list(es):
function test_where_feature_dependency (line 145) | def test_where_feature_dependency(es):
function test_feature_use_previous_pd_timedelta (line 180) | def test_feature_use_previous_pd_timedelta(es):
function test_feature_use_previous_pd_dateoffset (line 214) | def test_feature_use_previous_pd_dateoffset(es):
function _compare_feature_dicts (line 278) | def _compare_feature_dicts(a_dict, b_dict):
FILE: featuretools/tests/primitive_tests/test_groupby_transform_primitives.py
class TestCumCount (line 23) | class TestCumCount:
method test_order (line 26) | def test_order(self):
method test_regular (line 35) | def test_regular(self):
method test_discrete (line 43) | def test_discrete(self):
class TestCumSum (line 52) | class TestCumSum:
method test_order (line 55) | def test_order(self):
method test_regular (line 65) | def test_regular(self):
class TestCumMean (line 75) | class TestCumMean:
method test_order (line 78) | def test_order(self):
method test_regular (line 88) | def test_regular(self):
class TestCumMax (line 98) | class TestCumMax:
method test_order (line 101) | def test_order(self):
method test_regular (line 111) | def test_regular(self):
class TestCumMin (line 121) | class TestCumMin:
method test_order (line 124) | def test_order(self):
method test_regular (line 134) | def test_regular(self):
function test_cum_sum (line 144) | def test_cum_sum(es):
function test_cum_min (line 164) | def test_cum_min(es):
function test_cum_max (line 184) | def test_cum_max(es):
function test_cum_sum_group_on_nan (line 204) | def test_cum_sum_group_on_nan(es):
function test_cum_sum_numpy_group_on_nan (line 257) | def test_cum_sum_numpy_group_on_nan(es):
function test_cum_handles_uses_full_dataframe (line 325) | def test_cum_handles_uses_full_dataframe(es):
function test_cum_mean (line 357) | def test_cum_mean(es):
function test_cum_count (line 377) | def test_cum_count(es):
function test_rename (line 396) | def test_rename(es):
function test_groupby_no_data (line 414) | def test_groupby_no_data(es):
function test_groupby_uses_calc_time (line 431) | def test_groupby_uses_calc_time(es):
function test_groupby_multi_output_stacking (line 488) | def test_groupby_multi_output_stacking(es):
function test_serialization (line 510) | def test_serialization(es):
function test_groupby_with_multioutput_primitive (line 536) | def test_groupby_with_multioutput_primitive(es):
function test_groupby_with_multioutput_primitive_custom_names (line 591) | def test_groupby_with_multioutput_primitive_custom_names(es):
FILE: featuretools/tests/primitive_tests/test_identity_features.py
function test_relationship_path (line 5) | def test_relationship_path(es):
function test_serialization (line 10) | def test_serialization(es):
FILE: featuretools/tests/primitive_tests/test_overrides.py
function test_overrides (line 34) | def test_overrides(es):
function test_override_boolean (line 86) | def test_override_boolean(es):
function test_scalar_overrides (line 112) | def test_scalar_overrides(es):
function test_override_cmp_from_column (line 182) | def test_override_cmp_from_column(es):
function test_override_cmp (line 199) | def test_override_cmp(es):
FILE: featuretools/tests/primitive_tests/test_primitive_base.py
function test_call_agg (line 11) | def test_call_agg():
function test_call_trans (line 19) | def test_call_trans():
function test_uses_calc_time (line 25) | def test_uses_calc_time():
function test_call_multiple_args (line 35) | def test_call_multiple_args():
function test_get_function_called_once (line 45) | def test_get_function_called_once():
function test_multiple_arg_string (line 66) | def test_multiple_arg_string():
function test_single_args_string (line 78) | def test_single_args_string():
function test_args_string_default (line 82) | def test_args_string_default():
function test_args_string_mixed (line 86) | def test_args_string_mixed():
function test_args_string_undefined (line 98) | def test_args_string_undefined():
function test_args_string_error (line 103) | def test_args_string_error():
FILE: featuretools/tests/primitive_tests/test_primitive_utils.py
function test_list_primitives_order (line 51) | def test_list_primitives_order():
function test_valid_input_types (line 75) | def test_valid_input_types():
function test_descriptions (line 87) | def test_descriptions():
function test_get_descriptions_doesnt_truncate_primitive_description (line 97) | def test_get_descriptions_doesnt_truncate_primitive_description():
function test_get_default_aggregation_primitives (line 145) | def test_get_default_aggregation_primitives():
function test_get_default_transform_primitives (line 162) | def test_get_default_transform_primitives():
function this_dir (line 178) | def this_dir():
function primitives_to_install_dir (line 183) | def primitives_to_install_dir(this_dir):
function bad_primitives_files_dir (line 188) | def bad_primitives_files_dir(this_dir):
function test_list_primitive_files (line 192) | def test_list_primitive_files(primitives_to_install_dir):
function test_load_primitive_from_file (line 200) | def test_load_primitive_from_file(primitives_to_install_dir):
function test_errors_more_than_one_primitive_in_file (line 206) | def test_errors_more_than_one_primitive_in_file(bad_primitives_files_dir):
function test_errors_no_primitive_in_file (line 214) | def test_errors_no_primitive_in_file(bad_primitives_files_dir):
function test_check_input_types (line 222) | def test_check_input_types():
function test_get_summary_primitives (line 254) | def test_get_summary_primitives():
function test_summarize_primitives (line 317) | def test_summarize_primitives():
FILE: featuretools/tests/primitive_tests/test_rolling_primitive_utils.py
function test_get_rolled_series_without_gap (line 23) | def test_get_rolled_series_without_gap(window_series):
function test_get_rolled_series_without_gap_not_uniform (line 31) | def test_get_rolled_series_without_gap_not_uniform(window_series):
function test_get_rolled_series_without_gap_empty_series (line 42) | def test_get_rolled_series_without_gap_empty_series(window_series):
function test_get_rolled_series_without_gap_large_bound (line 48) | def test_get_rolled_series_without_gap_large_bound(window_series):
function test_roll_series_with_gap (line 72) | def test_roll_series_with_gap(window_length, gap, window_series):
function test_roll_series_with_no_gap (line 117) | def test_roll_series_with_no_gap(window_length, window_series):
function test_roll_series_with_gap_early_values (line 138) | def test_roll_series_with_gap_early_values(window_length, gap, window_se...
function test_roll_series_with_gap_nullable_types (line 187) | def test_roll_series_with_gap_nullable_types(window_series):
function test_roll_series_with_gap_nullable_types_with_nans (line 211) | def test_roll_series_with_gap_nullable_types_with_nans(window_series):
function test_apply_roll_with_offset_gap (line 259) | def test_apply_roll_with_offset_gap(window_length, gap, window_series):
function test_apply_roll_with_offset_gap_default_min_periods (line 311) | def test_apply_roll_with_offset_gap_default_min_periods(min_periods, win...
function test_apply_roll_with_offset_gap_min_periods (line 345) | def test_apply_roll_with_offset_gap_min_periods(min_periods, window_seri...
function test_apply_roll_with_offset_gap_non_uniform (line 375) | def test_apply_roll_with_offset_gap_non_uniform():
function test_apply_roll_with_offset_data_frequency_higher_than_parameters_frequency (line 410) | def test_apply_roll_with_offset_data_frequency_higher_than_parameters_fr...
function test_apply_roll_with_offset_data_min_periods_too_big (line 471) | def test_apply_roll_with_offset_data_min_periods_too_big(window_series):
function test_roll_series_with_gap_different_input_types_same_result_uniform (line 493) | def test_roll_series_with_gap_different_input_types_same_result_uniform(
function test_roll_series_with_gap_incorrect_types (line 542) | def test_roll_series_with_gap_incorrect_types(window_series):
function test_roll_series_with_gap_negative_inputs (line 559) | def test_roll_series_with_gap_negative_inputs(window_series):
function test_roll_series_with_non_offset_string_inputs (line 569) | def test_roll_series_with_non_offset_string_inputs(window_series):
function test_no_call_to_apply_roll_with_offset_gap_with_numeric (line 610) | def test_no_call_to_apply_roll_with_offset_gap_with_numeric(
FILE: featuretools/tests/primitive_tests/test_transform_features.py
function test_init_and_name (line 88) | def test_init_and_name(es):
function test_relationship_path (line 139) | def test_relationship_path(es):
function test_serialization (line 145) | def test_serialization(es):
function test_make_trans_feat (line 165) | def test_make_trans_feat(es):
function simple_es (line 176) | def simple_es():
function test_equal_categorical (line 200) | def test_equal_categorical(simple_es):
function test_equal_different_dtypes (line 221) | def test_equal_different_dtypes(simple_es):
function test_not_equal_categorical (line 244) | def test_not_equal_categorical(simple_es):
function test_not_equal_different_dtypes (line 266) | def test_not_equal_different_dtypes(simple_es):
function test_diff (line 289) | def test_diff(es):
function test_diff_shift (line 328) | def test_diff_shift(es):
function test_diff_single_value (line 342) | def test_diff_single_value(es):
function test_diff_reordered (line 354) | def test_diff_reordered(es):
function test_diff_single_value_is_nan (line 368) | def test_diff_single_value_is_nan(es):
function test_diff_datetime (line 381) | def test_diff_datetime(es):
function test_diff_datetime_shift (line 412) | def test_diff_datetime_shift(es):
function test_compare_of_identity (line 434) | def test_compare_of_identity(es):
function test_compare_of_direct (line 459) | def test_compare_of_direct(es):
function test_compare_of_transform (line 485) | def test_compare_of_transform(es):
function test_compare_of_agg (line 503) | def test_compare_of_agg(es):
function test_compare_all_nans (line 534) | def test_compare_all_nans(es):
function test_arithmetic_of_val (line 556) | def test_arithmetic_of_val(es):
function test_arithmetic_two_vals_fails (line 588) | def test_arithmetic_two_vals_fails(es):
function test_arithmetic_of_identity (line 594) | def test_arithmetic_of_identity(es):
function test_arithmetic_of_direct (line 629) | def test_arithmetic_of_direct(es):
function boolean_mult_es (line 659) | def boolean_mult_es():
function test_boolean_multiply (line 679) | def test_boolean_multiply(boolean_mult_es):
function test_arithmetic_of_transform (line 703) | def test_arithmetic_of_transform(es):
function test_not_feature (line 728) | def test_not_feature(es):
function test_arithmetic_of_agg (line 737) | def test_arithmetic_of_agg(es):
function test_latlong (line 770) | def test_latlong(es):
function test_latlong_with_nan (line 798) | def test_latlong_with_nan(es):
function test_haversine (line 856) | def test_haversine(es):
function test_haversine_with_nan (line 923) | def test_haversine_with_nan(es):
function test_text_primitives (line 974) | def test_text_primitives(es):
function test_isin_feat (line 1013) | def test_isin_feat(es):
function test_isin_feat_other_syntax (line 1029) | def test_isin_feat_other_syntax(es):
function test_isin_feat_other_syntax_int (line 1042) | def test_isin_feat_other_syntax_int(es):
function test_isin_feat_custom (line 1055) | def test_isin_feat_custom(es):
function test_isnull_feat (line 1107) | def test_isnull_feat(es):
function test_percentile (line 1143) | def test_percentile(es):
function test_dependent_percentile (line 1155) | def test_dependent_percentile(es):
function test_agg_percentile (line 1168) | def test_agg_percentile(es):
function test_percentile_agg_percentile (line 1182) | def test_percentile_agg_percentile(es):
function test_percentile_agg (line 1200) | def test_percentile_agg(es):
function test_direct_percentile (line 1216) | def test_direct_percentile(es):
function test_direct_agg_percentile (line 1231) | def test_direct_agg_percentile(es):
function test_percentile_with_cutoff (line 1249) | def test_percentile_with_cutoff(es):
function test_two_kinds_of_dependents (line 1262) | def test_two_kinds_of_dependents(es):
function test_get_filepath (line 1287) | def test_get_filepath(es):
function test_override_multi_feature_names (line 1326) | def test_override_multi_feature_names(es):
function test_time_since_primitive_matches_all_datetime_types (line 1363) | def test_time_since_primitive_matches_all_datetime_types(es):
function test_cfm_with_numeric_lag_and_non_nullable_column (line 1383) | def test_cfm_with_numeric_lag_and_non_nullable_column(es):
function test_cfm_with_lag_and_non_nullable_columns (line 1426) | def test_cfm_with_lag_and_non_nullable_columns(es):
function test_comparisons_with_ordinal_valid_inputs_that_dont_work_but_should (line 1518) | def test_comparisons_with_ordinal_valid_inputs_that_dont_work_but_should...
function test_multiply_numeric_boolean (line 1548) | def test_multiply_numeric_boolean():
function test_multiply_numeric_boolean_multiple_dtypes_no_nulls (line 1571) | def test_multiply_numeric_boolean_multiple_dtypes_no_nulls():
function test_multiply_numeric_boolean_multiple_dtypes_with_nulls (line 1589) | def test_multiply_numeric_boolean_multiple_dtypes_with_nulls():
function test_feature_multiplication (line 1602) | def test_feature_multiplication(es):
FILE: featuretools/tests/primitive_tests/transform_primitive_tests/test_cumulative_time_since.py
class TestCumulativeTimeSinceLastTrue (line 17) | class TestCumulativeTimeSinceLastTrue(PrimitiveTestBase):
method test_regular (line 25) | def test_regular(self):
method test_all_false (line 30) | def test_all_false(self):
method test_all_nan (line 40) | def test_all_nan(self):
method test_some_nans (line 48) | def test_some_nans(self):
method test_with_featuretools (line 85) | def test_with_featuretools(self, es):
class TestCumulativeTimeSinceLastFalse (line 92) | class TestCumulativeTimeSinceLastFalse(PrimitiveTestBase):
method test_regular (line 100) | def test_regular(self):
method test_all_true (line 105) | def test_all_true(self):
method test_all_nan (line 115) | def test_all_nan(self):
method test_some_nans (line 123) | def test_some_nans(self):
method test_with_featuretools (line 160) | def test_with_featuretools(self, es):
FILE: featuretools/tests/primitive_tests/transform_primitive_tests/test_datetoholiday_primitive.py
function test_datetoholiday (line 10) | def test_datetoholiday():
function test_datetoholiday_error (line 30) | def test_datetoholiday_error():
function test_nat (line 36) | def test_nat():
function test_valid_country (line 51) | def test_valid_country():
function test_multiple_countries (line 65) | def test_multiple_countries():
function test_with_timezone_aware_datetimes (line 113) | def test_with_timezone_aware_datetimes():
FILE: featuretools/tests/primitive_tests/transform_primitive_tests/test_distancetoholiday_primitive.py
function test_dist
Condensed preview — 501 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,571K chars).
[
{
"path": ".codecov.yml",
"chars": 47,
"preview": "codecov:\n notify:\n after_n_builds: 5\n"
},
{
"path": ".github/ISSUE_TEMPLATE/blank_issue.md",
"chars": 90,
"preview": "---\nname: Blank Issue\nabout: Create a blank issue\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.md",
"chars": 418,
"preview": "---\nname: Bug Report\nabout: Create a bug report to help us improve Featuretools\ntitle: ''\nlabels: 'bug'\nassignees: ''\n\n-"
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 521,
"preview": "blank_issues_enabled: true\ncontact_links:\n - name: General Technical Question\n about: \"If you have a question like *"
},
{
"path": ".github/ISSUE_TEMPLATE/documentation_improvement.md",
"chars": 222,
"preview": "---\nname: Documentation Improvement\nabout: Suggest an idea for improving the documentation\ntitle: ''\nlabels: 'documentat"
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.md",
"chars": 249,
"preview": "---\nname: Feature Request\nabout: Suggest an idea for this project\ntitle: ''\nlabels: 'new feature'\nassignees: ''\n\n---\n\n- "
},
{
"path": ".github/auto_assign.yml",
"chars": 67,
"preview": "# Set to author to set pr creator as assignee\naddAssignees: author\n"
},
{
"path": ".github/workflows/auto_approve_dependency_PRs.yaml",
"chars": 1545,
"preview": "name: Auto Approve Dependency PRs\non:\n schedule:\n - cron: '*/30 * * * *'\n workflow_dispatch:\n workflow_run:\n "
},
{
"path": ".github/workflows/broken_link_check.yaml",
"chars": 724,
"preview": "name: Broken link check\non:\n workflow_dispatch:\n schedule:\n - cron: \"* * * * 1\"\n\njobs:\n my-broken-link-checker:\n "
},
{
"path": ".github/workflows/build_docs.yaml",
"chars": 1869,
"preview": "name: Build Docs\non:\n pull_request:\n types: [opened, synchronize]\n push:\n branches:\n - main\n workflow_disp"
},
{
"path": ".github/workflows/create_feedstock_pr.yaml",
"chars": 2360,
"preview": "on:\n workflow_dispatch:\n inputs:\n version:\n description: 'released PyPI version to use (ex - v1.11.1)'\n "
},
{
"path": ".github/workflows/install_test.yaml",
"chars": 1513,
"preview": "name: Install Test\non:\n pull_request:\n types: [opened, synchronize]\n push:\n branches:\n - main\nenv:\n ALTERY"
},
{
"path": ".github/workflows/kickoff_evalml_unit_tests.yaml",
"chars": 444,
"preview": "name: Kickoff EvalML Unit Tests\n\non:\n push:\n branches:\n - main\n workflow_dispatch:\n\njobs:\n kickoff:\n name:"
},
{
"path": ".github/workflows/latest_dependency_checker.yaml",
"chars": 1645,
"preview": "# This workflow will install dependenies and if any critical dependencies have changed a pull request\n# will be created "
},
{
"path": ".github/workflows/lint_check.yaml",
"chars": 1416,
"preview": "name: Lint Check\non:\n pull_request:\n types: [opened, synchronize]\n push:\n branches:\n - main\njobs:\n lint_ch"
},
{
"path": ".github/workflows/minimum_dependency_checker.yaml",
"chars": 2085,
"preview": "name: Minimum Dependency Checker\non:\n workflow_dispatch:\n push:\n branches:\n - main\n paths:\n - 'pyproje"
},
{
"path": ".github/workflows/performance-check.yaml",
"chars": 795,
"preview": "name: performance-check\non:\n push:\n branches:\n - main\n workflow_dispatch:\njobs:\n run-performance-analysis:\n "
},
{
"path": ".github/workflows/pull_request_check.yaml",
"chars": 494,
"preview": "name: Pull Request Check\non:\n pull_request:\n types: [opened, edited, reopened, synchronize]\njobs:\n pull_request_che"
},
{
"path": ".github/workflows/release.yaml",
"chars": 968,
"preview": "on:\n release:\n types: [published]\n\nname: Release\njobs:\n pypi-publish:\n name: PyPI Release\n runs-on: ubuntu-la"
},
{
"path": ".github/workflows/release_notes_updated.yaml",
"chars": 1401,
"preview": "name: Release Notes Updated\non:\n pull_request:\n types: [opened, synchronize]\njobs:\n release_notes_updated:\n name"
},
{
"path": ".github/workflows/test_without_test_dependencies.yaml",
"chars": 1390,
"preview": "name: Test without Test Dependencies\non:\n pull_request:\n types: [opened, synchronize]\n push:\n branches:\n - "
},
{
"path": ".github/workflows/tests_with_latest_deps.yaml",
"chars": 3991,
"preview": "name: Tests\non:\n pull_request:\n types: [opened, synchronize]\n push:\n branches:\n - main\n workflow_dispatch:"
},
{
"path": ".github/workflows/tests_with_minimum_deps.yaml",
"chars": 1703,
"preview": "name: Tests - Minimum Dependencies\non:\n pull_request:\n types: [opened, synchronize]\n push:\n branches:\n - ma"
},
{
"path": ".github/workflows/tests_with_woodwork_main_branch.yaml",
"chars": 2000,
"preview": "name: Tests - Featuretools with Woodwork main branch\non:\n workflow_dispatch:\njobs:\n tests_woodwork_main:\n if: ${{ g"
},
{
"path": ".gitignore",
"chars": 1931,
"preview": "#\ndocs/source/generated/\ndocs/source/getting_started/graphs\nvenv/\ndata/\ninstalled/\noutput.csv\nhtmlcov/\n.idea/\nfeaturetoo"
},
{
"path": ".pre-commit-config.yaml",
"chars": 1060,
"preview": "exclude: |\n (?x)\n .html$|.csv$|.svg$|.md$|.txt$|.json$|.xml$|.pickle$|^.github/|\n (LICENSE.*|README.*)\nrepos:\n - rep"
},
{
"path": ".readthedocs.yaml",
"chars": 638,
"preview": "# .readthedocs.yaml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html f"
},
{
"path": "LICENSE",
"chars": 1518,
"preview": "BSD 3-Clause License\n\nCopyright (c) 2017, Feature Labs, Inc.\nAll rights reserved.\n\nRedistribution and use in source and "
},
{
"path": "Makefile",
"chars": 1638,
"preview": ".PHONY: clean\nclean:\n\tfind . -name '*.pyo' -delete\n\tfind . -name '*.pyc' -delete\n\tfind . -name __pycache__ -delete\n\tfind"
},
{
"path": "README.md",
"chars": 11159,
"preview": "<p align=\"center\">\n<img width=50% src=\"https://www.featuretools.com/wp-content/uploads/2017/12/FeatureLabs-Logo-Tangerin"
},
{
"path": "contributing.md",
"chars": 6448,
"preview": "# Contributing to Featuretools\n\n:+1::tada: First off, thank you for taking the time to contribute! :tada::+1:\n\nWhether y"
},
{
"path": "docs/Makefile",
"chars": 8164,
"preview": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS =\nSPHINXBUILD "
},
{
"path": "docs/backport_release.md",
"chars": 7770,
"preview": "# Backport Release Process\n\nIn situations where we need to backport commits to earlier versions of our software, we'll n"
},
{
"path": "docs/make.bat",
"chars": 7753,
"preview": "@ECHO OFF\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sphinx-build\r\n)\r\n"
},
{
"path": "docs/notebook_version_standardizer.py",
"chars": 5577,
"preview": "import json\nimport os\n\nimport click\n\nDOCS_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), \"source\")\n\n\nde"
},
{
"path": "docs/pull_request_template.md",
"chars": 286,
"preview": "### Pull Request Description\n(replace this text with your description)\n\n-----\n*After creating the pull request: in order"
},
{
"path": "docs/source/_static/style.css",
"chars": 783,
"preview": ".footer {\n background-color: #0D2345;\n padding-bottom: 40px;\n padding-top: 40px;\n width: 100%;\n}\n\n.footer-ce"
},
{
"path": "docs/source/api_reference.rst",
"chars": 9111,
"preview": ".. _api_ref:\n\nAPI Reference\n=============\n\n.. currentmodule:: featuretools\n\nDemo Datasets\n~~~~~~~~~~~~~\n.. currentmodule"
},
{
"path": "docs/source/conf.py",
"chars": 13163,
"preview": "# -*- coding: utf-8 -*-\n#\n# featuretools documentation build configuration file, created by\n# sphinx-quickstart on Thu M"
},
{
"path": "docs/source/getting_started/afe.ipynb",
"chars": 8332,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Deep Feature Synthesis\\n\",\n \"\\"
},
{
"path": "docs/source/getting_started/getting_started_index.rst",
"chars": 305,
"preview": "Getting Started\n---------------\n\nFor a quick introduction to Featuretools, check out our :ref:`5 minute quick start guid"
},
{
"path": "docs/source/getting_started/handling_time.ipynb",
"chars": 27919,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"a8104f18\",\n \"metadata\": {},\n \"source\": [\n \"# Handling Tim"
},
{
"path": "docs/source/getting_started/primitives.ipynb",
"chars": 14524,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"raw\",\n \"metadata\": {\n \"raw_mimetype\": \"text/restructuredtext\"\n },\n \"source\":"
},
{
"path": "docs/source/getting_started/using_entitysets.ipynb",
"chars": 11578,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Representing Data with EntitySets"
},
{
"path": "docs/source/getting_started/woodwork_types.ipynb",
"chars": 16003,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"b95b28c1\",\n \"metadata\": {},\n \"source\": [\n \"# Woodwork Typ"
},
{
"path": "docs/source/guides/advanced_custom_primitives.ipynb",
"chars": 10645,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Advanced Custom Primitives Guide\""
},
{
"path": "docs/source/guides/deployment.ipynb",
"chars": 5162,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"92a0dab5\",\n \"metadata\": {},\n \"source\": [\n \"# Deployment\\n"
},
{
"path": "docs/source/guides/feature_descriptions.ipynb",
"chars": 12736,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"1557274d\",\n \"metadata\": {},\n \"source\": [\n \"# Generating F"
},
{
"path": "docs/source/guides/feature_selection.ipynb",
"chars": 11370,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Feature Selection\\n\",\n \"\\n\",\n "
},
{
"path": "docs/source/guides/guides_index.rst",
"chars": 299,
"preview": "Guides\n---------------\n\nGuides on more advanced Featuretools functionality\n\n.. toctree::\n :maxdepth: 1\n\n tuning_dfs\n"
},
{
"path": "docs/source/guides/performance.ipynb",
"chars": 11970,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"raw\",\n \"id\": \"2c5291f3\",\n \"metadata\": {\n \"raw_mimetype\": \"text/restructuredtex"
},
{
"path": "docs/source/guides/specifying_primitive_options.ipynb",
"chars": 11460,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"ba92172a\",\n \"metadata\": {},\n \"source\": [\n \"# Specifying P"
},
{
"path": "docs/source/guides/sql_database_integration.ipynb",
"chars": 4656,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# SQL Database Integration \\n\",\n "
},
{
"path": "docs/source/guides/time_series.ipynb",
"chars": 12528,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"id\": \"17f894b5\",\n \"metadata\": {\n \"nbsphin"
},
{
"path": "docs/source/guides/tuning_dfs.ipynb",
"chars": 7999,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"a4329c7d\",\n \"metadata\": {},\n \"source\": [\n \"# Tuning Deep "
},
{
"path": "docs/source/index.ipynb",
"chars": 10239,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"raw\",\n \"id\": \"25bd9564\",\n \"metadata\": {\n \"raw_mimetype\": \"text/restructuredtex"
},
{
"path": "docs/source/install.md",
"chars": 3230,
"preview": "# Install\n\nFeaturetools is available for Python 3.9 - 3.12. It can be installed from [pypi](https://pypi.org/project/fea"
},
{
"path": "docs/source/release_notes.rst",
"chars": 106847,
"preview": ".. _release_notes:\n\nRelease Notes\n-------------\n\nFuture Release\n==============\n * Enhancements\n * Fixes\n * Chan"
},
{
"path": "docs/source/resources/ecosystem.rst",
"chars": 5901,
"preview": ":description: A list of libraries, use cases / demos, and tutorials that leverage Featuretools\n\n========================"
},
{
"path": "docs/source/resources/frequently_asked_questions.ipynb",
"chars": 73395,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Frequently Asked Questions\\n\",\n "
},
{
"path": "docs/source/resources/help.rst",
"chars": 1690,
"preview": "Help\n====\n\nCouldn't find what you were looking for?\nThe Featuretools community is happy to provide support to users of F"
},
{
"path": "docs/source/resources/resources_index.rst",
"chars": 229,
"preview": "Resources\n---------\n\nFrequently asked questions and additional resources\n\n.. toctree::\n :maxdepth: 1\n\n transition_to"
},
{
"path": "docs/source/resources/transition_to_ft_v1.0.ipynb",
"chars": 38244,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"6004844f\",\n \"metadata\": {},\n \"source\": [\n \"# Transitionin"
},
{
"path": "docs/source/resources/usage_tips/glossary.rst",
"chars": 2022,
"preview": ".. _glossary:\n.. currentmodule:: featuretools\n\nGlossary\n========\n\n.. glossary::\n :sorted:\n\n feature\n A tran"
},
{
"path": "docs/source/resources/usage_tips/limitations.rst",
"chars": 548,
"preview": "Limitations\n-----------\nIn-memory\n*********\n\nFeaturetools is intended to be run on datasets that can fit in memory on on"
},
{
"path": "docs/source/set-headers.py",
"chars": 142,
"preview": "import urllib.request\n\nopener = urllib.request.build_opener()\nopener.addheaders = [(\"Testing\", \"True\")]\nurllib.request.i"
},
{
"path": "docs/source/setup.py",
"chars": 1071,
"preview": "import os\n\nimport featuretools as ft\n\n\ndef load_feature_plots():\n es = ft.demo.load_mock_customer(return_entityset=Tr"
},
{
"path": "docs/source/templates/layout.html",
"chars": 2129,
"preview": "{% extends \"!layout.html\" %}\n\n{%- block extrahead %}\n\n\n{% set image = 'https://alteryx-oss-web-images.s3.amazonaws.com/O"
},
{
"path": "featuretools/__init__.py",
"chars": 2152,
"preview": "# flake8: noqa\nfrom featuretools.version import __version__\nfrom featuretools.config_init import config\nfrom featuretool"
},
{
"path": "featuretools/__main__.py",
"chars": 0,
"preview": ""
},
{
"path": "featuretools/computational_backends/__init__.py",
"chars": 69,
"preview": "# flake8: noqa\nfrom featuretools.computational_backends.api import *\n"
},
{
"path": "featuretools/computational_backends/api.py",
"chars": 283,
"preview": "# flake8: noqa\nfrom featuretools.computational_backends.calculate_feature_matrix import (\n approximate_features,\n "
},
{
"path": "featuretools/computational_backends/calculate_feature_matrix.py",
"chars": 35852,
"preview": "import logging\nimport math\nimport os\nimport shutil\nimport time\nimport warnings\nfrom datetime import datetime\n\nimport clo"
},
{
"path": "featuretools/computational_backends/feature_set.py",
"chars": 9251,
"preview": "import itertools\nimport logging\nfrom collections import defaultdict\n\nfrom featuretools.entityset.relationship import Rel"
},
{
"path": "featuretools/computational_backends/feature_set_calculator.py",
"chars": 34233,
"preview": "from datetime import datetime\nfrom functools import partial\n\nimport numpy as np\nimport pandas as pd\nimport pandas.api.ty"
},
{
"path": "featuretools/computational_backends/utils.py",
"chars": 14497,
"preview": "import logging\nimport os\nimport typing\nimport warnings\nfrom datetime import datetime\nfrom functools import wraps\n\nimport"
},
{
"path": "featuretools/config_init.py",
"chars": 2078,
"preview": "import copy\nimport logging\nimport os\nimport sys\n\n\ndef initialize_logging():\n loggers = {}\n\n # Check for environmen"
},
{
"path": "featuretools/demo/__init__.py",
"chars": 51,
"preview": "# flake8: noqa\nfrom featuretools.demo.api import *\n"
},
{
"path": "featuretools/demo/api.py",
"chars": 227,
"preview": "# flake8: noqa\nfrom featuretools.demo.flight import load_flight\nfrom featuretools.demo.mock_customer import load_mock_cu"
},
{
"path": "featuretools/demo/flight.py",
"chars": 9667,
"preview": "import math\nimport re\n\nimport pandas as pd\nfrom tqdm import tqdm\nfrom woodwork.logical_types import Boolean, Categorical"
},
{
"path": "featuretools/demo/mock_customer.py",
"chars": 4072,
"preview": "import pandas as pd\nfrom numpy import random\nfrom numpy.random import choice\nfrom woodwork.logical_types import Categori"
},
{
"path": "featuretools/demo/retail.py",
"chars": 3466,
"preview": "import pandas as pd\nfrom woodwork.logical_types import NaturalLanguage\n\nimport featuretools as ft\n\n\ndef load_retail(id=\""
},
{
"path": "featuretools/demo/weather.py",
"chars": 923,
"preview": "import pandas as pd\n\nimport featuretools as ft\n\n\ndef load_weather(nrows=None, return_single_table=False):\n \"\"\"\n Lo"
},
{
"path": "featuretools/entityset/__init__.py",
"chars": 56,
"preview": "# flake8: noqa\nfrom featuretools.entityset.api import *\n"
},
{
"path": "featuretools/entityset/api.py",
"chars": 248,
"preview": "# flake8: noqa\nfrom featuretools.entityset.deserialize import read_entityset\nfrom featuretools.entityset.entityset impor"
},
{
"path": "featuretools/entityset/deserialize.py",
"chars": 6574,
"preview": "import json\nimport os\nimport tarfile\nimport tempfile\nfrom inspect import getfullargspec\n\nimport pandas as pd\nimport wood"
},
{
"path": "featuretools/entityset/entityset.py",
"chars": 70843,
"preview": "import copy\nimport logging\nimport warnings\nfrom collections import defaultdict\n\nimport numpy as np\nimport pandas as pd\nf"
},
{
"path": "featuretools/entityset/relationship.py",
"chars": 6709,
"preview": "class Relationship(object):\n \"\"\"Class to represent a relationship between dataframes\n\n See Also:\n :class:`."
},
{
"path": "featuretools/entityset/serialize.py",
"chars": 3389,
"preview": "import datetime\nimport json\nimport os\nimport tarfile\nimport tempfile\n\nfrom woodwork.serializers.serializer_base import t"
},
{
"path": "featuretools/entityset/timedelta.py",
"chars": 6575,
"preview": "import pandas as pd\nfrom dateutil.relativedelta import relativedelta\n\n\nclass Timedelta(object):\n \"\"\"Represents differ"
},
{
"path": "featuretools/exceptions.py",
"chars": 178,
"preview": "class UnknownFeature(Exception):\n def __init__(self, *args, **kwargs):\n Exception.__init__(self, *args, **kwar"
},
{
"path": "featuretools/feature_base/__init__.py",
"chars": 59,
"preview": "# flake8: noqa\nfrom featuretools.feature_base.api import *\n"
},
{
"path": "featuretools/feature_base/api.py",
"chars": 532,
"preview": "# flake8: noqa\nfrom featuretools.feature_base.feature_base import (\n AggregationFeature,\n DirectFeature,\n Featu"
},
{
"path": "featuretools/feature_base/cache.py",
"chars": 1618,
"preview": "\"\"\"\ncache.py\n\nCustom caching class, currently used for FeatureBase\n\"\"\"\n\n# needed for defaultdict annotation if < python "
},
{
"path": "featuretools/feature_base/feature_base.py",
"chars": 37146,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, BooleanNullable\n\nfrom featur"
},
{
"path": "featuretools/feature_base/feature_descriptions.py",
"chars": 6819,
"preview": "import json\n\nimport featuretools as ft\n\n\ndef describe_feature(\n feature,\n feature_descriptions=None,\n primitive"
},
{
"path": "featuretools/feature_base/feature_visualizer.py",
"chars": 9889,
"preview": "import html\n\nfrom featuretools.feature_base.feature_base import (\n AggregationFeature,\n DirectFeature,\n Feature"
},
{
"path": "featuretools/feature_base/features_deserializer.py",
"chars": 5377,
"preview": "import json\n\nfrom featuretools.entityset.deserialize import (\n description_to_entityset as deserialize_es,\n)\nfrom fea"
},
{
"path": "featuretools/feature_base/features_serializer.py",
"chars": 6039,
"preview": "import json\n\nfrom featuretools.primitives.utils import serialize_primitive\nfrom featuretools.utils.s3_utils import get_t"
},
{
"path": "featuretools/feature_base/utils.py",
"chars": 388,
"preview": "def is_valid_input(candidate, template):\n \"\"\"Checks if a candidate schema should be considered a match for a template"
},
{
"path": "featuretools/feature_discovery/FeatureCollection.py",
"chars": 9039,
"preview": "from __future__ import annotations\n\nimport hashlib\nfrom itertools import combinations\nfrom typing import Any, Dict, List"
},
{
"path": "featuretools/feature_discovery/LiteFeature.py",
"chars": 9826,
"preview": "from __future__ import annotations\n\nimport hashlib\nfrom dataclasses import field\nfrom functools import total_ordering\nfr"
},
{
"path": "featuretools/feature_discovery/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "featuretools/feature_discovery/convertors.py",
"chars": 6120,
"preview": "from __future__ import annotations\n\nfrom typing import Dict, List\n\nimport pandas as pd\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/feature_discovery/feature_discovery.py",
"chars": 12093,
"preview": "import inspect\nfrom collections import defaultdict\nfrom itertools import combinations, permutations, product\nfrom typing"
},
{
"path": "featuretools/feature_discovery/type_defs.py",
"chars": 12,
"preview": "ANY = \"ANY\"\n"
},
{
"path": "featuretools/feature_discovery/utils.py",
"chars": 2270,
"preview": "import hashlib\nimport json\nfrom functools import lru_cache\nfrom typing import Any, Dict, Tuple\n\nfrom woodwork.column_sch"
},
{
"path": "featuretools/primitives/__init__.py",
"chars": 2800,
"preview": "# flake8: noqa\nimport inspect\nimport logging\nimport traceback\n\nimport pkg_resources\n\nfrom featuretools.primitives.standa"
},
{
"path": "featuretools/primitives/base/__init__.py",
"chars": 244,
"preview": "from featuretools.primitives.base.aggregation_primitive_base import AggregationPrimitive\nfrom featuretools.primitives.ba"
},
{
"path": "featuretools/primitives/base/aggregation_primitive_base.py",
"chars": 1057,
"preview": "from featuretools.primitives.base.primitive_base import PrimitiveBase\n\n\nclass AggregationPrimitive(PrimitiveBase):\n d"
},
{
"path": "featuretools/primitives/base/primitive_base.py",
"chars": 5845,
"preview": "import os\nfrom inspect import signature\n\nimport numpy as np\nimport pandas as pd\n\nfrom featuretools import config\nfrom fe"
},
{
"path": "featuretools/primitives/base/transform_primitive_base.py",
"chars": 817,
"preview": "from featuretools.primitives.base.primitive_base import PrimitiveBase\n\n\nclass TransformPrimitive(PrimitiveBase):\n \"\"\""
},
{
"path": "featuretools/primitives/options_utils.py",
"chars": 12514,
"preview": "import logging\nimport warnings\nfrom itertools import permutations\n\nfrom featuretools import primitives\nfrom featuretools"
},
{
"path": "featuretools/primitives/standard/__init__.py",
"chars": 305,
"preview": "# flake8: noqa\nfrom featuretools.primitives.base.aggregation_primitive_base import AggregationPrimitive\nfrom featuretool"
},
{
"path": "featuretools/primitives/standard/aggregation/__init__.py",
"chars": 5915,
"preview": "from featuretools.primitives.standard.aggregation.all_primitive import All\nfrom featuretools.primitives.standard.aggrega"
},
{
"path": "featuretools/primitives/standard/aggregation/all_primitive.py",
"chars": 849,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, BooleanNu"
},
{
"path": "featuretools/primitives/standard/aggregation/any_primitive.py",
"chars": 854,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, BooleanNu"
},
{
"path": "featuretools/primitives/standard/aggregation/average_count_per_unique.py",
"chars": 1740,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Double\n\nfrom featuretools.primitives."
},
{
"path": "featuretools/primitives/standard/aggregation/avg_time_between.py",
"chars": 2680,
"preview": "from datetime import datetime\n\nimport numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfr"
},
{
"path": "featuretools/primitives/standard/aggregation/count.py",
"chars": 985,
"preview": "import pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\n"
},
{
"path": "featuretools/primitives/standard/aggregation/count_above_mean.py",
"chars": 1264,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\nf"
},
{
"path": "featuretools/primitives/standard/aggregation/count_below_mean.py",
"chars": 1265,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\nf"
},
{
"path": "featuretools/primitives/standard/aggregation/count_greater_than.py",
"chars": 1030,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Integer\n\nfrom featuretools.primitives"
},
{
"path": "featuretools/primitives/standard/aggregation/count_inside_nth_std.py",
"chars": 1132,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Integer\n\nfrom feat"
},
{
"path": "featuretools/primitives/standard/aggregation/count_inside_range.py",
"chars": 1650,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\nf"
},
{
"path": "featuretools/primitives/standard/aggregation/count_less_than.py",
"chars": 1005,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Integer\n\nfrom featuretools.primitives"
},
{
"path": "featuretools/primitives/standard/aggregation/count_outside_nth_std.py",
"chars": 1128,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Integer\n\nfrom feat"
},
{
"path": "featuretools/primitives/standard/aggregation/count_outside_range.py",
"chars": 1598,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\nf"
},
{
"path": "featuretools/primitives/standard/aggregation/date_first_event.py",
"chars": 1043,
"preview": "from pandas import NaT\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime\n\nfrom"
},
{
"path": "featuretools/primitives/standard/aggregation/entropy.py",
"chars": 1453,
"preview": "from scipy import stats\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_p"
},
{
"path": "featuretools/primitives/standard/aggregation/first.py",
"chars": 590,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primitive_base import Agg"
},
{
"path": "featuretools/primitives/standard/aggregation/first_last_time_delta.py",
"chars": 1250,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime, Double\n\n"
},
{
"path": "featuretools/primitives/standard/aggregation/has_no_duplicates.py",
"chars": 1494,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/aggregation/is_monotonically_decreasing.py",
"chars": 1008,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/aggregation/is_monotonically_increasing.py",
"chars": 1008,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/aggregation/is_unique.py",
"chars": 1035,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/aggregation/kurtosis.py",
"chars": 2570,
"preview": "from scipy.stats import kurtosis\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Doub"
},
{
"path": "featuretools/primitives/standard/aggregation/last.py",
"chars": 582,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primitive_base import Agg"
},
{
"path": "featuretools/primitives/standard/aggregation/max_consecutive_false.py",
"chars": 1907,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, Integer\n\nfrom featuretools.p"
},
{
"path": "featuretools/primitives/standard/aggregation/max_consecutive_negatives.py",
"chars": 2451,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Double, Integer\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/aggregation/max_consecutive_positives.py",
"chars": 2447,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Double, Integer\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/aggregation/max_consecutive_true.py",
"chars": 1759,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, Integer\n\nfrom featuretools.p"
},
{
"path": "featuretools/primitives/standard/aggregation/max_consecutive_zeros.py",
"chars": 2379,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Double, Integer\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/aggregation/max_count.py",
"chars": 1247,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base import Aggregation"
},
{
"path": "featuretools/primitives/standard/aggregation/max_min_delta.py",
"chars": 1126,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base import AggregationPrimitive\n\n\nclass M"
},
{
"path": "featuretools/primitives/standard/aggregation/max_primitive.py",
"chars": 610,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primit"
},
{
"path": "featuretools/primitives/standard/aggregation/mean.py",
"chars": 1092,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primit"
},
{
"path": "featuretools/primitives/standard/aggregation/median.py",
"chars": 694,
"preview": "import pandas as pd\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primi"
},
{
"path": "featuretools/primitives/standard/aggregation/median_count.py",
"chars": 1396,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\nf"
},
{
"path": "featuretools/primitives/standard/aggregation/min_count.py",
"chars": 1328,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\nf"
},
{
"path": "featuretools/primitives/standard/aggregation/min_primitive.py",
"chars": 611,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primit"
},
{
"path": "featuretools/primitives/standard/aggregation/mode.py",
"chars": 808,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primit"
},
{
"path": "featuretools/primitives/standard/aggregation/n_most_common.py",
"chars": 1687,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primit"
},
{
"path": "featuretools/primitives/standard/aggregation/n_most_common_frequency.py",
"chars": 2345,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/aggregation/n_unique_days.py",
"chars": 1188,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime, Integer\n\nfrom featuretools."
},
{
"path": "featuretools/primitives/standard/aggregation/n_unique_days_of_calendar_year.py",
"chars": 1275,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime, Integer\n\nfrom featuretools."
},
{
"path": "featuretools/primitives/standard/aggregation/n_unique_days_of_month.py",
"chars": 1290,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime, Integer\n\nfrom featuretools."
},
{
"path": "featuretools/primitives/standard/aggregation/n_unique_months.py",
"chars": 1262,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime, Integer\n\nfrom featuretools."
},
{
"path": "featuretools/primitives/standard/aggregation/n_unique_weeks.py",
"chars": 1284,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime, Integer\n\nfrom featuretools."
},
{
"path": "featuretools/primitives/standard/aggregation/num_consecutive_greater_mean.py",
"chars": 2828,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\nf"
},
{
"path": "featuretools/primitives/standard/aggregation/num_consecutive_less_mean.py",
"chars": 2799,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\nf"
},
{
"path": "featuretools/primitives/standard/aggregation/num_false_since_last_true.py",
"chars": 1587,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, IntegerNu"
},
{
"path": "featuretools/primitives/standard/aggregation/num_peaks.py",
"chars": 1032,
"preview": "import pandas as pd\nfrom scipy.signal import find_peaks\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.lo"
},
{
"path": "featuretools/primitives/standard/aggregation/num_true.py",
"chars": 961,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, BooleanNu"
},
{
"path": "featuretools/primitives/standard/aggregation/num_true_since_last_false.py",
"chars": 1455,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, IntegerNu"
},
{
"path": "featuretools/primitives/standard/aggregation/num_unique.py",
"chars": 1423,
"preview": "import pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import IntegerNullable\n\n"
},
{
"path": "featuretools/primitives/standard/aggregation/num_zero_crossings.py",
"chars": 1168,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Integer\n\nfrom feat"
},
{
"path": "featuretools/primitives/standard/aggregation/percent_true.py",
"chars": 1147,
"preview": "import pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, BooleanN"
},
{
"path": "featuretools/primitives/standard/aggregation/percent_unique.py",
"chars": 1392,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Double\n\nfrom featuretools.primitives."
},
{
"path": "featuretools/primitives/standard/aggregation/skew.py",
"chars": 877,
"preview": "import pandas as pd\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primi"
},
{
"path": "featuretools/primitives/standard/aggregation/std.py",
"chars": 648,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primit"
},
{
"path": "featuretools/primitives/standard/aggregation/sum_primitive.py",
"chars": 723,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.aggregation_primit"
},
{
"path": "featuretools/primitives/standard/aggregation/time_since_first.py",
"chars": 2121,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime, Double\n\nfrom featuretools.p"
},
{
"path": "featuretools/primitives/standard/aggregation/time_since_last.py",
"chars": 2124,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime, Double\n\nfrom featuretools.p"
},
{
"path": "featuretools/primitives/standard/aggregation/time_since_last_false.py",
"chars": 2223,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/aggregation/time_since_last_max.py",
"chars": 1969,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/aggregation/time_since_last_min.py",
"chars": 1969,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/aggregation/time_since_last_true.py",
"chars": 2207,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/aggregation/trend.py",
"chars": 1371,
"preview": "import pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Datetime\n\nfrom fe"
},
{
"path": "featuretools/primitives/standard/aggregation/variance.py",
"chars": 1007,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Double\n\nfrom featu"
},
{
"path": "featuretools/primitives/standard/transform/__init__.py",
"chars": 1685,
"preview": "# flake8: noqa\nfrom featuretools.primitives.standard.transform.absolute_diff import AbsoluteDiff\nfrom featuretools.primi"
},
{
"path": "featuretools/primitives/standard/transform/absolute_diff.py",
"chars": 2599,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base import TransformPrimitive\n\n\nclass Abs"
},
{
"path": "featuretools/primitives/standard/transform/binary/__init__.py",
"chars": 2970,
"preview": "from featuretools.primitives.standard.transform.binary.add_numeric import AddNumeric\nfrom featuretools.primitives.standa"
},
{
"path": "featuretools/primitives/standard/transform/binary/add_numeric.py",
"chars": 1000,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.transform_primitiv"
},
{
"path": "featuretools/primitives/standard/transform/binary/add_numeric_scalar.py",
"chars": 1039,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.transform_primitive_base import Trans"
},
{
"path": "featuretools/primitives/standard/transform/binary/and_primitive.py",
"chars": 1481,
"preview": "import numpy as np\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import Boolean, BooleanNu"
},
{
"path": "featuretools/primitives/standard/transform/binary/divide_by_feature.py",
"chars": 1116,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.transform_primitive_base import Trans"
},
{
"path": "featuretools/primitives/standard/transform/binary/divide_numeric.py",
"chars": 1412,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.transform_primitive_base import Trans"
},
{
"path": "featuretools/primitives/standard/transform/binary/divide_numeric_scalar.py",
"chars": 1101,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.transform_primitive_base import Trans"
},
{
"path": "featuretools/primitives/standard/transform/binary/equal.py",
"chars": 1627,
"preview": "import pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\n"
},
{
"path": "featuretools/primitives/standard/transform/binary/equal_scalar.py",
"chars": 1092,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/transform/binary/greater_than.py",
"chars": 2076,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/transform/binary/greater_than_equal_to.py",
"chars": 2166,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/transform/binary/greater_than_equal_to_scalar.py",
"chars": 1345,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/transform/binary/greater_than_scalar.py",
"chars": 1226,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/transform/binary/less_than.py",
"chars": 2047,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/transform/binary/less_than_equal_to.py",
"chars": 2136,
"preview": "import numpy as np\nimport pandas as pd\nfrom woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types impor"
},
{
"path": "featuretools/primitives/standard/transform/binary/less_than_equal_to_scalar.py",
"chars": 1314,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/transform/binary/less_than_scalar.py",
"chars": 1196,
"preview": "from woodwork.column_schema import ColumnSchema\nfrom woodwork.logical_types import BooleanNullable\n\nfrom featuretools.pr"
},
{
"path": "featuretools/primitives/standard/transform/binary/modulo_by_feature.py",
"chars": 1148,
"preview": "from woodwork.column_schema import ColumnSchema\n\nfrom featuretools.primitives.base.transform_primitive_base import Trans"
}
]
// ... and 301 more files (download for full content)
About this extraction
This page contains the full source code of the alteryx/featuretools GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 501 files (2.3 MB), approximately 629.8k tokens, and a symbol index with 2634 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.